{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Weighted sample of fiction\n",
    "\n",
    "What if we selected titles proportional to their representation in the library? Instead of giving each title the same chance to be selected, we could give chances proportional to the number of contemporary copies in Hathi, where \"contemporary\" means \"within 25 years of first appearance.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# first, we \"import\" some modules we'll be using\n",
    "\n",
    "import pandas as pd\n",
    "import random\n",
    "from collections import Counter\n",
    "import numpy as np\n",
    "from matplotlib import pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now actually read in the metadata."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>docid</th>\n",
       "      <th>oldauthor</th>\n",
       "      <th>author</th>\n",
       "      <th>authordate</th>\n",
       "      <th>inferreddate</th>\n",
       "      <th>latestcomp</th>\n",
       "      <th>datetype</th>\n",
       "      <th>startdate</th>\n",
       "      <th>enddate</th>\n",
       "      <th>imprint</th>\n",
       "      <th>...</th>\n",
       "      <th>recordid</th>\n",
       "      <th>instances</th>\n",
       "      <th>allcopiesofwork</th>\n",
       "      <th>copiesin25yrs</th>\n",
       "      <th>enumcron</th>\n",
       "      <th>volnum</th>\n",
       "      <th>title</th>\n",
       "      <th>parttitle</th>\n",
       "      <th>earlyedition</th>\n",
       "      <th>shorttitle</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>mdp.39015031913893</td>\n",
       "      <td>Spencer, Louise Reid</td>\n",
       "      <td>Spencer, Louise Reid</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2100</td>\n",
       "      <td>|</td>\n",
       "      <td>||||</td>\n",
       "      <td>||||</td>\n",
       "      <td>Thomas Y. Crowell company|1945</td>\n",
       "      <td>...</td>\n",
       "      <td>8744</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Guerrilla wife | $c: [by] Louise Reid Spencer.</td>\n",
       "      <td>NaN</td>\n",
       "      <td>True</td>\n",
       "      <td>Guerrilla wife</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>mdp.39015003936864</td>\n",
       "      <td>Baker, Robert H</td>\n",
       "      <td>Baker, Robert H</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2100</td>\n",
       "      <td>n</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>Port Washington, N.Y.|Ashley Books|197-?].</td>\n",
       "      <td>...</td>\n",
       "      <td>39757</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>The suburbs : | a novel / | $c: by Robert H. B...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>True</td>\n",
       "      <td>The suburbs : a novel</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>mdp.39015068342305</td>\n",
       "      <td>Dickens, Charles</td>\n",
       "      <td>Dickens, Charles</td>\n",
       "      <td>1812-1870.</td>\n",
       "      <td>0</td>\n",
       "      <td>1870</td>\n",
       "      <td>n</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>New York|The American news company|n.d.</td>\n",
       "      <td>...</td>\n",
       "      <td>119996</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Edwin Drood. | $c: By Charles Dickens. With il...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>True</td>\n",
       "      <td>Edwin Drood</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>mdp.39015055066586</td>\n",
       "      <td>Stretton, Hesba</td>\n",
       "      <td>Stretton, Hesba</td>\n",
       "      <td>1832-1911.</td>\n",
       "      <td>0</td>\n",
       "      <td>1911</td>\n",
       "      <td>n</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>New York|Dodd, Mead &amp; co.|n.d.</td>\n",
       "      <td>...</td>\n",
       "      <td>122460</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Carola, | $c: by Hesba Stretton.</td>\n",
       "      <td>NaN</td>\n",
       "      <td>True</td>\n",
       "      <td>Carola</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>mdp.39015055066594</td>\n",
       "      <td>Stretton,Hesba</td>\n",
       "      <td>Stretton, Hesba</td>\n",
       "      <td>1832-1911.</td>\n",
       "      <td>0</td>\n",
       "      <td>1911</td>\n",
       "      <td>n</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>New York|Dodd, Mead &amp; co.|n.d.</td>\n",
       "      <td>...</td>\n",
       "      <td>122464</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>In prison &amp; out. | $c: By Hesba Stretton.</td>\n",
       "      <td>NaN</td>\n",
       "      <td>True</td>\n",
       "      <td>In prison &amp; out</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                docid             oldauthor                author  authordate  \\\n",
       "0  mdp.39015031913893  Spencer, Louise Reid  Spencer, Louise Reid         NaN   \n",
       "1  mdp.39015003936864       Baker, Robert H       Baker, Robert H         NaN   \n",
       "2  mdp.39015068342305      Dickens, Charles      Dickens, Charles  1812-1870.   \n",
       "3  mdp.39015055066586       Stretton, Hesba       Stretton, Hesba  1832-1911.   \n",
       "4  mdp.39015055066594        Stretton,Hesba       Stretton, Hesba  1832-1911.   \n",
       "\n",
       "   inferreddate  latestcomp datetype startdate enddate  \\\n",
       "0             0        2100        |      ||||    ||||   \n",
       "1             0        2100        n                     \n",
       "2             0        1870        n                     \n",
       "3             0        1911        n                     \n",
       "4             0        1911        n                     \n",
       "\n",
       "                                      imprint          ...           recordid  \\\n",
       "0              Thomas Y. Crowell company|1945          ...               8744   \n",
       "1  Port Washington, N.Y.|Ashley Books|197-?].          ...              39757   \n",
       "2     New York|The American news company|n.d.          ...             119996   \n",
       "3              New York|Dodd, Mead & co.|n.d.          ...             122460   \n",
       "4              New York|Dodd, Mead & co.|n.d.          ...             122464   \n",
       "\n",
       "  instances allcopiesofwork copiesin25yrs enumcron volnum  \\\n",
       "0         1               1             1      NaN    NaN   \n",
       "1         1               1             1      NaN    NaN   \n",
       "2         1               1             1      NaN    NaN   \n",
       "3         1               1             1      NaN    NaN   \n",
       "4         1               1             1      NaN    NaN   \n",
       "\n",
       "                                               title parttitle  earlyedition  \\\n",
       "0     Guerrilla wife | $c: [by] Louise Reid Spencer.       NaN          True   \n",
       "1  The suburbs : | a novel / | $c: by Robert H. B...       NaN          True   \n",
       "2  Edwin Drood. | $c: By Charles Dickens. With il...       NaN          True   \n",
       "3                   Carola, | $c: by Hesba Stretton.       NaN          True   \n",
       "4          In prison & out. | $c: By Hesba Stretton.       NaN          True   \n",
       "\n",
       "              shorttitle  \n",
       "0         Guerrilla wife  \n",
       "1  The suburbs : a novel  \n",
       "2            Edwin Drood  \n",
       "3                 Carola  \n",
       "4        In prison & out  \n",
       "\n",
       "[5 rows x 28 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "meta = pd.read_csv('../noveltmmeta/workmeta.tsv', sep = '\\t', low_memory = False)\n",
    "meta.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(138137, 28)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "meta.shape\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's only use volumes that are published near their author's lifespan."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(129023, 28)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "meta = meta.loc[meta.earlyedition == True, : ]\n",
    "meta.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Histogram by number of copies\n",
    "\n",
    "First, let's explore the basic concept of the thing we're attempting. \n",
    "\n",
    "Our first sample gave each *title* an equal chance of appearing, even though some titles appear many times in Hathi, and some only once.\n",
    "\n",
    "We want to create a sample where often-reprinted works have a greater chance of appearing. Occurrences in Hathi are by no means exactly == actual # of reprints, but we trust that there is some relationship. We're only going to count reprints in the first 25 years, because long-term canonicity is a different question, and might require yet another sample.\n",
    "\n",
    "To start, let's consider how titles are distributed across different levels of representation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>docid</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>copiesin25yrs</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>88077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23147</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10290</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3317</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1598</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               docid\n",
       "copiesin25yrs       \n",
       "1              88077\n",
       "2              23147\n",
       "3              10290\n",
       "4               3317\n",
       "5               1598"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bycopies = meta.groupby('copiesin25yrs').agg({'docid': 'count'})\n",
    "bycopies.head()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We're going to want to sample titles with a probability proportional to the total number of copies. Let's generate that probability:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>docid</th>\n",
       "      <th>totalcopies</th>\n",
       "      <th>cumulative</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>copiesin25yrs</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>88077</td>\n",
       "      <td>0.423297</td>\n",
       "      <td>0.423297</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>23147</td>\n",
       "      <td>0.222488</td>\n",
       "      <td>0.645785</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10290</td>\n",
       "      <td>0.148361</td>\n",
       "      <td>0.794145</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3317</td>\n",
       "      <td>0.063766</td>\n",
       "      <td>0.857911</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1598</td>\n",
       "      <td>0.038400</td>\n",
       "      <td>0.896311</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1048</td>\n",
       "      <td>0.030220</td>\n",
       "      <td>0.926531</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>466</td>\n",
       "      <td>0.015677</td>\n",
       "      <td>0.942208</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>308</td>\n",
       "      <td>0.011842</td>\n",
       "      <td>0.954050</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>210</td>\n",
       "      <td>0.009083</td>\n",
       "      <td>0.963133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>138</td>\n",
       "      <td>0.006632</td>\n",
       "      <td>0.969766</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>93</td>\n",
       "      <td>0.004917</td>\n",
       "      <td>0.974682</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>96</td>\n",
       "      <td>0.005536</td>\n",
       "      <td>0.980219</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>51</td>\n",
       "      <td>0.003186</td>\n",
       "      <td>0.983405</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>33</td>\n",
       "      <td>0.002220</td>\n",
       "      <td>0.985625</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>26</td>\n",
       "      <td>0.001874</td>\n",
       "      <td>0.987500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>31</td>\n",
       "      <td>0.002384</td>\n",
       "      <td>0.989883</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>3</td>\n",
       "      <td>0.000245</td>\n",
       "      <td>0.990129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>16</td>\n",
       "      <td>0.001384</td>\n",
       "      <td>0.991513</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>8</td>\n",
       "      <td>0.000731</td>\n",
       "      <td>0.992243</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>23</td>\n",
       "      <td>0.002211</td>\n",
       "      <td>0.994454</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>7</td>\n",
       "      <td>0.000706</td>\n",
       "      <td>0.995160</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>3</td>\n",
       "      <td>0.000317</td>\n",
       "      <td>0.995478</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>4</td>\n",
       "      <td>0.000442</td>\n",
       "      <td>0.995920</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>6</td>\n",
       "      <td>0.000692</td>\n",
       "      <td>0.996612</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>4</td>\n",
       "      <td>0.000481</td>\n",
       "      <td>0.997092</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>7</td>\n",
       "      <td>0.000875</td>\n",
       "      <td>0.997967</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>1</td>\n",
       "      <td>0.000130</td>\n",
       "      <td>0.998097</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>5</td>\n",
       "      <td>0.000673</td>\n",
       "      <td>0.998770</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>1</td>\n",
       "      <td>0.000139</td>\n",
       "      <td>0.998909</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>2</td>\n",
       "      <td>0.000288</td>\n",
       "      <td>0.999197</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>1</td>\n",
       "      <td>0.000154</td>\n",
       "      <td>0.999351</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>1</td>\n",
       "      <td>0.000178</td>\n",
       "      <td>0.999529</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>2</td>\n",
       "      <td>0.000471</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               docid  totalcopies  cumulative\n",
       "copiesin25yrs                                \n",
       "1              88077     0.423297    0.423297\n",
       "2              23147     0.222488    0.645785\n",
       "3              10290     0.148361    0.794145\n",
       "4               3317     0.063766    0.857911\n",
       "5               1598     0.038400    0.896311\n",
       "6               1048     0.030220    0.926531\n",
       "7                466     0.015677    0.942208\n",
       "8                308     0.011842    0.954050\n",
       "9                210     0.009083    0.963133\n",
       "10               138     0.006632    0.969766\n",
       "11                93     0.004917    0.974682\n",
       "12                96     0.005536    0.980219\n",
       "13                51     0.003186    0.983405\n",
       "14                33     0.002220    0.985625\n",
       "15                26     0.001874    0.987500\n",
       "16                31     0.002384    0.989883\n",
       "17                 3     0.000245    0.990129\n",
       "18                16     0.001384    0.991513\n",
       "19                 8     0.000731    0.992243\n",
       "20                23     0.002211    0.994454\n",
       "21                 7     0.000706    0.995160\n",
       "22                 3     0.000317    0.995478\n",
       "23                 4     0.000442    0.995920\n",
       "24                 6     0.000692    0.996612\n",
       "25                 4     0.000481    0.997092\n",
       "26                 7     0.000875    0.997967\n",
       "27                 1     0.000130    0.998097\n",
       "28                 5     0.000673    0.998770\n",
       "29                 1     0.000139    0.998909\n",
       "30                 2     0.000288    0.999197\n",
       "32                 1     0.000154    0.999351\n",
       "37                 1     0.000178    0.999529\n",
       "49                 2     0.000471    1.000000"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "totalcopies = []\n",
    "for idx, row in bycopies.iterrows():\n",
    "    copies = int(idx) * row['docid']\n",
    "    totalcopies.append(copies)\n",
    "totalcopies = np.array(totalcopies) / np.sum(totalcopies)\n",
    "bycopies = bycopies.assign(totalcopies = totalcopies)\n",
    "\n",
    "cumulativeprob = []\n",
    "cumulate = 0\n",
    "for idx, row in bycopies.iterrows():\n",
    "    cumulate += row['totalcopies']\n",
    "    cumulativeprob.append(cumulate)\n",
    "    \n",
    "bycopies = bycopies.assign(cumulative = cumulativeprob)\n",
    "bycopies"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Now load our first sample\n",
    "\n",
    "Here every title had an equal chance of being picked."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2730, 28)"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "first = pd.read_csv('firstsample.tsv', sep = '\\t')\n",
    "first.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>docid</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>copiesin25yrs</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1655</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>489</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>330</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               docid\n",
       "copiesin25yrs       \n",
       "1               1655\n",
       "2                489\n",
       "3                330\n",
       "4                126\n",
       "5                 34\n",
       "6                 33\n",
       "7                 28\n",
       "8                  9\n",
       "9                  6\n",
       "10                 5\n",
       "11                 4\n",
       "12                 5\n",
       "13                 4\n",
       "15                 1\n",
       "16                 1"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "firstbycopies = first.groupby('copiesin25yrs').agg({'docid': 'count'})\n",
    "firstbycopies"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Generate a distribution proportional to copy counts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 1163\n",
      "2 615\n",
      "3 401\n",
      "4 168\n",
      "5 101\n",
      "6 88\n",
      "7 31\n",
      "8 34\n",
      "9 31\n",
      "10 14\n",
      "11 18\n",
      "12 12\n",
      "13 7\n",
      "14 6\n",
      "15 6\n",
      "16 5\n",
      "17 0\n",
      "18 3\n",
      "19 2\n",
      "20 4\n",
      "21 3\n",
      "22 0\n",
      "23 1\n",
      "24 3\n",
      "25 2\n",
      "26 3\n",
      "27 0\n",
      "28 2\n",
      "29 0\n"
     ]
    }
   ],
   "source": [
    "copycounts = []\n",
    "cumulist = list(bycopies.cumulative)\n",
    "for i in range(0, 2730):\n",
    "    prob = random.uniform(0, 1)\n",
    "    n = next(x[0] for x in enumerate(cumulist) if x[1] > prob)\n",
    "    copycounts.append(n + 1)\n",
    "\n",
    "xs = []\n",
    "ys = []\n",
    "proportional = Counter(copycounts)\n",
    "for n in range(1, 30):\n",
    "    print(n, proportional[n])\n",
    "    xs.append(n)\n",
    "    ys.append(proportional[n])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Now visualize the difference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from pylab import rcParams\n",
    "rcParams['figure.figsize'] = 9, 6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAFwCAYAAACSOICCAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XmcXGWd7/HPr5NOwhrClpiFPY2sAkIiMkIrw35NcANE\nRVn0Cji4IonjlcidUUC5g46iwwgMKosRQUCRnUYQoygiSyIJYISEEGQxrAnp9HP/eKqTStOd3qrq\nVHd/3q9XvarqqXOqfwcC/c2znUgpIUmSVE8aii5AkiSpIwOKJEmqOwYUSZJUdwwokiSp7hhQJElS\n3TGgSJKkutNtQImIiyJiaUQ8UNb2loj4bUT8KSJ+HxF7l302MyIWRMS8iDi4rH2viHggIuZHxPmV\nvxRJkjRY9KQH5RLgkA5t5wJnppT2BM4EvgEQETsDRwE7AYcBF0RElM75HnBiSqkJaIqIjt8pSZIE\n9CCgpJTuBl7o0NwGjC693gRYXHo9DbgypdSaUloILACmRMQ4YKOU0r2l434IHNnP2iVJ0iA1vI/n\nfRa4KSLOAwJ4e6l9AvDbsuMWl9pagUVl7YtK7ZIkSW/Q10myJwOfTiltRQ4rF1euJEmSNNT1tQfl\noymlTwOklK6KiB+U2hcDk8qOm1hq66q9UxHhDYIkSRpEUkrR/VFr9LQHJUqPdosj4gCAiDiQPNcE\n4DrgmIgYERHbAjsAv08pPQ0si4gppUmzxwHXrusHppSG3OPMM88svAav3Wv32r12r9trr/SjL7rt\nQYmIy4FmYLOIeIK8aufjwLcjYhiwHPhEKVTMjYjZwFxgJXBKWlPZqcD/AKOAG1JKN/apYkmSNOh1\nG1BSSsd28dHenTWmlL4OfL2T9j8Cu/WqOkmSNCS5k2wdaW5uLrqEwnjtQ5PXPvQM1euGoX3tfRF9\nHRuqpohI9ViXJEnqvYggVWmSrCRJUs0YUCRJUt0xoEiSpLpjQJEkSXXHgCJJkuqOAUWSJNUdA4ok\nSao7BhRJklR3DCiSJKnuGFAkSVLdMaBIkqS6Y0CRJEl1x4AiSZLqjgFFkiTVHQOKJEmqOwYUSZJU\ndwwokiSp7hhQJElS3TGgSJKkumNAkSRJdceAUua3v4Vrrim6CkmSZEAps3AhXHZZ0VVIkiQDSpnJ\nk2HBgqKrkCRJkVIquoY3iIhURF3LlsH48fDyyxBR8x8vSdKgFBGklHr1m9UelDKjR8MGG8BTTxVd\niSRJQ5sBpYOmJod5JEkqWrcBJSIuioilEfFAh/Z/iYh5EfFgRJxd1j4zIhaUPju4rH2viHggIuZH\nxPmVvYzKcR6KJEnF60kPyiXAIeUNEdEMvBvYLaW0G/DNUvtOwFHATsBhwAURq2dzfA84MaXUBDRF\nxFrfWS8MKJIkFa/bgJJSuht4oUPzycDZKaXW0jHPltqnA1emlFpTSguBBcCUiBgHbJRSurd03A+B\nIytQf8UZUCRJKl5f56A0AftHxJyIuCMi3lpqnwA8WXbc4lLbBGBRWfuiUlvdMaBIklS84f04b0xK\n6W0RsQ/wU2C7ypVVnB12gMceg7Y2aHAKsSRJhehrQHkSuBogpXRvRKyKiM3IPSZblR03sdS2GJjU\nSXuXZs2atfp1c3Mzzc3NfSy1dzbcEMaMgUWLYKutuj9ekiStraWlhZaWln59R482aouIbYDrSxNi\niYhPABNSSmdGRBNwS0pp64jYGbgMmEoewrkFmJxSShExBzgNuBf4JfDtlNKNXfy8QjZqa3fAAfCV\nr8CBBxZWgiRJg0ZVNmqLiMuBe8grb56IiOOBi4HtIuJB4HLgOICU0lxgNjAXuAE4pSxpnApcBMwH\nFnQVTuqB81AkSSqWW9134pxz4Jln4LzzCitBkqRBw63uK8QeFEmSimVA6YQBRZKkYjnE04lXX4VN\nN4VXXoFhwworQ5KkQcEhngpZf33YYgt44omiK5EkaWgyoHRh8mSYP7/oKiRJGpoMKF1wHookScUx\noHTBgCJJUnEMKF0woEiSVBwDShcMKJIkFcdlxl1YsQI23hhefhkaGwstRZKkAc1lxhU0ciSMHw8L\nFxZdiSRJQ48BZR0c5pEkqRgGlHUwoEiSVAwDyjoYUCRJKoYBZR0MKJIkFcOAsg4GFEmSiuEy43VY\nuRI23BBeeglGjCi6GkmSBiaXGVdYYyNMmgSPP150JZIkDS0GlG44zCNJUu0ZULphQJEkqfYMKN0w\noEiSVHsGlG4YUCRJqj0DSjcMKJIk1Z7LjLvR2pqXGr/wAqy3XtHVSJI08LjMuAqGD4ett4bHHiu6\nEkmShg4DSg84zCNJUm0ZUHrAgCJJUm0ZUHrAgCJJUm0ZUHqgqcmAIklSLXUbUCLioohYGhEPdPLZ\n5yOiLSI2LWubGRELImJeRBxc1r5XRDwQEfMj4vzKXUL12YMiSVJt9aQH5RLgkI6NETEROAj4W1nb\nTsBRwE7AYcAFEdG+rOh7wIkppSagKSLe8J31atIkeP55eOWVoiuRJGlo6DagpJTuBl7o5KP/AE7v\n0DYduDKl1JpSWggsAKZExDhgo5TSvaXjfggc2eeqa6yhAbbbDh59tOhKJEkaGvo0ByUipgFPppQe\n7PDRBODJsveLS20TgEVl7YtKbQOGwzySJNXO8N6eEBHrAV8iD+9UzaxZs1a/bm5uprm5uZo/rlsG\nFEmSeqalpYWWlpZ+fUePtrqPiK2B61NKu0fErsCtwKtAABPJPSVTgBMAUkpnl867ETiTPE/ljpTS\nTqX2Y4ADUkond/Hz6mar+3YXXghz5sDFFxddiSRJA0s1t7qP0oOU0kMppXEppe1SStuSh2v2TCk9\nA1wHHB0RIyJiW2AH4PcppaeBZRExpTRp9jjg2t4UWjR7UCRJqp2eLDO+HLiHvPLmiYg4vsMhiTXh\nZS4wG5gL3ACcUtYVcipwETAfWJBSurEyl1AbBhRJkmrHuxn3UFsbbLQRLFkCG29cdDWSJA0c3s24\nihoaYPvtXWosSVItGFB6wWEeSZJqw4DSCwYUSZJqw4DSCwYUSZJqw4DSCwYUSZJqw4DSCwYUSZJq\nw4DSC+PGwfLl8EJnt06UJEkVY0DphQjYYQd7USRJqjYDSi85zCNJUvUZUHqpqcmAIklStRlQeske\nFEmSqs+A0ksGFEmSqs+A0kvtAaXO7mUoSdKgYkDppc03z+HkueeKrkSSpMHLgNJLEQ7zSJJUbQaU\nPjCgSJJUXQaUPjCgSJJUXQaUPjCgSJJUXQaUPjCgSJJUXZHqcL1sRKR6rKvd88/DNtvAsmV50qwk\nSepaRJBS6tVvTHtQ+mDTTaGxEZ55puhKJEkanAwofeQwjyRJ1WNA6SMDiiRJ1WNA6SMDiiRJ1WNA\n6SMDiiRJ1WNA6SMDiiRJ1eMy4z5atgwmTICXXnKpsSRJ6+Iy4xoaPRrWXx+eeqroSiRJGny6DSgR\ncVFELI2IB8razo2IeRFxf0T8LCI2LvtsZkQsKH1+cFn7XhHxQETMj4jzK38ptecwjyRJ1dGTHpRL\ngEM6tN0M7JJS2gNYAMwEiIidgaOAnYDDgAsiVg+AfA84MaXUBDRFRMfvHHCamgwokiRVQ7cBJaV0\nN/BCh7ZbU0ptpbdzgIml19OAK1NKrSmlheTwMiUixgEbpZTuLR33Q+DICtRfKHtQJEmqjkrMQTkB\nuKH0egLwZNlni0ttE4BFZe2LSm0DmgFFkqTq6FdAiYh/BVamlK6oUD0DigFFkqTqGN7XEyPiY8Dh\nwLvKmhcDk8reTyy1ddXepVmzZq1+3dzcTHNzc19LrZoddoDHHoO2NmhwPZQkSQC0tLTQ0tLSr+/o\n0T4oEbENcH1KabfS+0OB84D9U0rPlR23M3AZMJU8hHMLMDmllCJiDnAacC/wS+DbKaUbu/h5db8P\nSrvx42HOHNhqq6IrkSSpPlVlH5SIuBy4h7zy5omIOB74T2BD4JaIuC8iLgBIKc0FZgNzyfNSTilL\nGqcCFwHzgQVdhZOBxmEeSZIqz51k++mkk2DvveGTnyy6EkmS6pM7yRbAHhRJkirPgFIuJXj11V6d\nYkCRJKnyDCjlfvAD+NSnenWKAUWSpMpzDkq5v/4Vpk6FJUtg2LAenfLqq7DppvDKKz0+RZKkIcU5\nKP217bYwblxeN9xD668PW2wBTzxRxbokSRpiDCgdTZ8O117bq1Mc5pEkqbIMKB0ZUCRJKpwBpaO3\nvhVefhn+8pcen2JAkSSpsgwoHUXAtGm96kUxoEiSVFkGlM70cpjHgCJJUmW5zLgzK1bA2LHwyCP5\nuRvLl8Po0XlkqLGxBvVJkjSAuMy4UkaOhIMPhl/8okeHjxqV72q8cGF1y5IkaagwoHTFYR5Jkgpj\nQOnK4YdDS0veIrYHDCiSJFWOAaUrY8bA3nvDLbf06HADiiRJlWNAWZdeDPMYUCRJqhwDyrpMn54n\nyq5a1e2hBhRJkirHgLIu22yTl+f89rfdHrrttrB4Mbz+evXLkiRpsDOgdKeHwzyNjTBpEjz+eA1q\nkiRpkDOgdKc9oPRg4ziHeSRJqgwDSnf22gtefbVHNw80oEiSVBkGlO704uaBBhRJkirDgNIT06fD\nddd1e5gBRZKkyjCg9ERzM8ydC0uXrvMwA4okSZVhQOmJkSPhkEPg+uvXedjWW+cMs3x5jeqSJGmQ\nMqD0VA+WGw8fnkPKY4/VqCZJkgYpA0pPHXYY3HlntzcPdJhHkqT+M6D01JgxsM8+cPPN6zzMgCJJ\nUv91G1Ai4qKIWBoRD5S1jYmImyPikYi4KSJGl302MyIWRMS8iDi4rH2viHggIuZHxPmVv5Qa6MFq\nHgOKJEn915MelEuAQzq0zQBuTSntCNwOzASIiJ2Bo4CdgMOACyIiSud8DzgxpdQENEVEx++sfz24\neaABRZKk/us2oKSU7gZe6NA8Hbi09PpS4MjS62nAlSml1pTSQmABMCUixgEbpZTuLR33w7JzBo6t\nt4YJE+Cee7o8pKnJgCJJUn/1dQ7KlimlpQAppaeBLUvtE4Any45bXGqbACwqa19Uaht4ulnNM2kS\nPPdct3NpJUnSOlRqkmz3d9IbLNq3ve/i5oENDbDddvDoozWuS5KkQWR4H89bGhFjU0pLS8M3z5Ta\nFwOTyo6bWGrrqr1Ls2bNWv26ubmZ5ubmPpZaYXvtlXdimzcPdt6500Pa56G85S01rk2SpDrQ0tJC\nS0tLv74jUhc9AWsdFLENcH1KabfS+3OA51NK50TEGcCYlNKM0iTZy4Cp5CGcW4DJKaUUEXOA04B7\ngV8C304p3djFz0s9qaswp56ax3JmzOj049NPh003hZkza1yXJEl1KCJIKUX3R67Rk2XGlwP3kFfe\nPBERxwNnAwdFxCPAgaX3pJTmArOBucANwCllSeNU4CJgPrCgq3AyIHQzD8WVPJIk9U+PelBqre57\nUF5/HcaOzcM848a94eM77oCvfAXuuquA2iRJqjNV6UFRJ0aMWOfNA+1BkSSpfwwofdW+mqcT48fD\nSy/Biy/WuCZJkgYJA0pfHX54vnngyy+/4aOGBth+e5caS5LUVwaUvtpkE5g6FW65pdOPHeaRJKnv\nDCj9sY7VPAYUSZL6zoDSH9OmwS9/Ca2tb/jIgCJJUt8ZUPpj661h4sRObx5oQJEkqe8MKP3VxWoe\nA4okSX1nQOmv9nkoHTaWGzcu37LnH/8oqC5JkgYwA0p/7bln3ll23ry1miNghx3sRZEkqS8MKP0V\n4TCPJEkVZkCphC6WGxtQJEnqGwNKJRxwADzyCCxZslazAUWSpL4xoFRCFzcPbGoyoEiS1BcGlErp\nZJhn8mSYP/8NC3wkSVI3ItXhb8+ISPVY1zr94x+w1Vbw1FOw4YZADiZjxuSbBm6+ecH1SZJUkIgg\npRS9OccelEppv3ngzTevbopwHookSX1hQKmkLoZ5DCiSJPWOAaWSOrl5oAFFkqTeM6BU0lZbwaRJ\n8JvfrG4yoEiS1HsGlErrMMxjQJEkqfcMKJU2fTpcd93qtcXtAWWgLUqSJKlIBpRK22MPWLkS5s4F\nYNNNobERnnmm4LokSRpADCiV1snNAx3mkSSpdwwo1eA8FEmS+sWAUg3775/3uH/qKcCAIklSbxlQ\nqmHECDj00NU3DzSgSJLUOwaUamlfzYMBRZKk3upXQImIz0bEQxHxQERcFhEjImJMRNwcEY9ExE0R\nMbrs+JkRsSAi5kXEwf0vv44ddhjcdRe8/DKTJ+cbBrrUWJKknulzQImI8cC/AHullHYHhgMfBGYA\nt6aUdgRuB2aWjt8ZOArYCTgMuCAienVnwwFl9Gh429vgppsYPRrWXx+WLCm6KEmSBob+DvEMAzaI\niOHAesBiYDpwaenzS4EjS6+nAVemlFpTSguBBcCUfv78+la2msdhHkmSeq7PASWl9BRwHvAEOZgs\nSyndCoxNKS0tHfM0sGXplAnAk2VfsbjUNni9+92rbx5oQJEkqef6M8SzCbm3ZGtgPLkn5UNAx5kW\nQ3fmxVZbwdZbw91309RkQJEkqaeG9+PcfwYeTyk9DxAR1wBvB5ZGxNiU0tKIGAe0b/K+GJhUdv7E\nUlunZs2atfp1c3Mzzc3N/Si1QKXVPJP3beaKK4ouRpKk6mtpaaGlpaVf3xGpj0tLImIKcBGwD7AC\nuAS4F9gKeD6ldE5EnAGMSSnNKE2SvQyYSh7auQWYnDopICI6ax6Y7r8f3vc+7r/qUT5yXPDgg0UX\nJElSbUUEKaVeLYzpcw9KSun3EXEV8CdgZen5QmAjYHZEnAD8jbxyh5TS3IiYDcwtHX/K4Ekh6/CW\nt8CqVTStfJhHH92VtjZocPcZSZLWqc89KNU0qHpQAE47DcaOZfx3/5U5c/LUFEmShoq+9KD4d/la\nKN3d2JU8kiT1jAGlFg44ABYsYO/xTxlQJEnqAQNKLTQ2wmGHcdCK6w0okiT1gAGlVqZPZ4+F1xpQ\nJEnqASfJ1sqLL7Jq/ESmjF/MH+dvVHQ1kiTVjJNk69nGG8Pb9qXprzexalXRxUiSVN8MKDU07D3T\neP+Ia3niiaIrkSSpvhlQamnaNP759Rt4dN7KoiuRJKmuGVBqadIk/rHJNrx802+KrkSSpLpmQKmx\nRXtNZ8yvry26DEmS6poBpcZeP2w6b37kWhhsq5QkSaogA0qNjT1od1pXtsFDDxVdiiRJdcuAUmPb\nbR9cm6ax6hqHeSRJ6ooBpcZGjYLfbDadlVcZUCRJ6ooBpQAv7LY/DX99DBYvLroUSZLqkgGlANvt\n2MjjOx4G119fdCmSJNUlA0oBJk+GezafDtc6zCNJUmcMKAWYPBl+0Xoo3H03vPRS0eVIklR3DCgF\nmDwZ/vzXjeHtb4cbbyy6HEmS6o4BpQDbbguLFkHrEQ7zSJLUGQNKARobYdIkWLj7NPjVr2ClNw+U\nJKmcAaUgkyfDvJcm5u6Uu+8uuhxJkuqKAaUgkyfDggXAdId5JEnqyIBSkDcEFG8eKEnSagaUgqwO\nKLvtlsPJgw8WXZIkSXXDgFKQ1QElwmEeSZI6MKAUZOutYelSWL6cHFCuu67okiRJqhsGlIIMH55D\nymOPAe94Bzz+uDcPlCSppF8BJSJGR8RPI2JeRDwcEVMjYkxE3BwRj0TETRExuuz4mRGxoHT8wf0v\nf2BbPczT2AiHHWYviiRJJf3tQfkWcENKaSfgLcBfgBnArSmlHYHbgZkAEbEzcBSwE3AYcEFERD9/\n/oC2OqCA81AkSSrT54ASERsD70gpXQKQUmpNKS0DpgOXlg67FDiy9HoacGXpuIXAAmBKX3/+YLBW\nQDnkEPjDH+B3vyu0JkmS6kF/elC2BZ6NiEsi4r6IuDAi1gfGppSWAqSUnga2LB0/AXiy7PzFpbYh\na62AsvHG8IMfwAc+kGfPSpI0hPUnoAwH9gK+m1LaC3iFPLzTcccxdyDrwloBBeDII+G44+CYY6C1\ntbC6JEkq2vB+nLsIeDKl9IfS+5+RA8rSiBibUloaEeOAZ0qfLwYmlZ0/sdTWqVmzZq1+3dzcTHNz\ncz9KrU+TJsFzz8Grr8L665cav/pVOOIImDEDvvnNQuuTJKkvWlpaaGlp6dd3ROrHFusRcSfw8ZTS\n/Ig4E2j/Nft8SumciDgDGJNSmlGaJHsZMJU8tHMLMDl1UkBEdNY8KO2yC1xxBey+e1nj88/D3nvD\n178ORx9dWG2SJFVCRJBS6tXCmP70oACcBlwWEY3A48DxwDBgdkScAPyNvHKHlNLciJgNzAVWAqcM\nmRSyDu3DPGsFlE03hauvhoMOyglm110Lq0+SpCL0qwelWoZSD8rpp8Nmm+URnTf48Y/zkM+998Im\nm9S8NkmSKqEvPSjuJFuwN0yULffhD8Ohh8JHPgJtbTWtS5KkIhlQCrbOgAJw3nnwwgvwb/9Ws5ok\nSSqaAaVgkyfD/PnrOGDECPjpT+HCC+GGG2pWlyRJRXIOSsHa2mCjjWDJkrxXW5d+8xt473vhnntg\n++1rVp8kSf3lHJQBqKEh541HH+3mwP32g698Bd7zHnjllZrUJklSUQwodaDbeSjtTjkF9twTPvEJ\nGCI9TJKkocmAUgd6HFAi4Pvfh7lz4dvfrnpdkiQVxYBSB3ocUADWWy9v4va1r8Gdd1a1LkmSimJA\nqQO9CigA224LP/oRfPCDsLjL2xlJkjRgGVDqQK8DCsDBB8OnPgXvfz+sWFGVuiRJKorLjOtASnmJ\n8ZNP9nJH+7Y2eN/74E1vggsuqFp9kiT1h8uMB6gI2GGHPvSiNDTApZfCbbfBJZdUpTZJkopgQKkT\nfRrmgdz1cs018MUvwh//WPG6JEkqggGlTvQ5oADsvHNefvy+98Gzz1a0LkmSimBAqRP9CiiQw8kx\nx+RHa2vF6pIkqQgGlDrR74AC+Y7HEfDlL1ekJkmSimJAqRNNTfCXv8ALL/TjS4YPhyuugCuvhJ/9\nrGK1SZJUawaUOrHFFnDSSfDOd8Izz/TjizbfHK66Cj75SZg3r2L1SZJUSwaUOvLNb8KRR8I73pH3\nROmzvfeGc8/Ndz5+8cWK1SdJUq24UVsd+n//L98L8JZb8tyUPjv5ZHj66Tzc02AWlSQVw43aBonP\nfS7Pc21uhgce6McXnX9+DijnnFOp0iRJqonhRRegzp10Emy0ERx0EFx3HUyd2ocvGTkyz0fZZx94\n61vz/XskSRoA7EGpY0cfDRdfDO9+N9xxRx+/ZMKEvKrnuONg4cJKlidJUtUYUOrcEUfA7Nk5rFx/\nfR+/ZP/9YeZMeO974bXXKlqfJEnV4CTZAeL3v4dp0/IE2mOP7cMXpAQf/nDeK+V//idv6CZJUg04\nSXYQmzIFbr013xPwwgv78AUR+cQ//Qm+972K1ydJUiU5SXYA2XVXaGnJE2eXLYPTT+/lF2ywQb7z\n8dvfDnvskZ8lSapD9qAMMDvsAHfdlSfPfvnLeeSmV7bfHi65BI46CpYsqUqNkiT1l3NQBqi//x0O\nOQT+6Z/ydie93oftrLPyTnC33w6NjVWpUZIkKGgOSkQ0RMR9EXFd6f2YiLg5Ih6JiJsiYnTZsTMj\nYkFEzIsIN+Xohy22yNnivvvgxBOhtbWXX/DlL8Mmm8AXvlCV+iRJ6o9KDPF8Gphb9n4GcGtKaUfg\ndmAmQETsDBwF7AQcBlwQ4VKS/thkE7jpJli8GI45Blas6MXJDQ3wox/BDTfAj39ctRolSeqLfgWU\niJgIHA78oKx5OnBp6fWlwJGl19OAK1NKrSmlhcACYEp/fr7yvNfrr4e2trwM+ZVXenHyJpvkSbOf\n/Szcf3/VapQkqbf624PyH8DpQPmEkbEppaUAKaWngS1L7ROA8nv0Li61qZ9GjsybuY0bl+elLFvW\ni5N33RW+8528idvzz1etRkmSeqPPy4wj4ghgaUrp/ohoXsehfZrtOmvWrNWvm5ubaW5e14/Q8OF5\ncc5pp8G73gU33pjnqfTI0UfDnDl5MsvVV7uJmySpX1paWmhpaenXd/R5FU9EfA34MNAKrAdsBFwD\n7A00p5SWRsQ44I6U0k4RMQNIKaVzSuffCJyZUvpdJ9/tKp4+SinPf7366ryx24Se9lGtWJH3RTnp\nJDj55KrWKEkaWvqyiqciy4wj4gDg8ymlaRFxLvBcSumciDgDGJNSmlGaJHsZMJU8tHMLMLmzJGJA\n6b9zz4Xvfz+vJN5++x6eNH8+7LdfvjPhrrtWtT5J0tDRl4BSjZ1kzwZmR8QJwN/IK3dIKc2NiNnk\nFT8rgVNMIdXzxS/CxhvDAQfklT677NKDk5qa4BvfyEuC7r0X1luv6nVKktQZN2ob5C67DD7/+bzS\nZ599enBCSvluhJtuCt/9btXrkyQNft4sUG/woQ/lewQecQTceWcPTojIY0M33AA//3nV65MkqTP2\noAwRt92WR24uvRQOP7wHJ/z2t3DkkfDHP8LEiVWvT5I0eNmDoi4deCBcdx0cf3zeM6Vb++6b1yx/\n5COwalXV65MkqZwBZQjZd1+4+Wb4zGfgoot6cMKMGfn57LOrWpckSR05xDMEzZ8PBx2Ud7j/zGe6\nOXjRInjrW/N8lH33rUl9kqTBxSEe9UhTE9x1F1xwAXz1q3nhTpcmTsyzbI89tpd76EuS1Hf2oAxh\nS5fCwQfn+SnnndfNDvennprv1XP55W6FL0nqFXtQ1Ctjx0JLS16w8/GPdzMX9pvfhIceysuAJEmq\nMntQxMsvw7vfnXeb/c531nHgQw/BO98Jv/lNHieSJKkH7EFRn2y4YZ4De9tteY+2Lu26K5x1Vt5Q\nZcWKmtUnSRp67EHRao8+mu8V+JOfQHNzFwelBO99L2y3XZ64IklSNwq7m3GlGVCKc9tteXv8e+7J\nGaRTzz0He+wB//3fcOihNa1PkjTwOMSjfjvwQPjKV/KclBdf7OKgzTaDH/0ITjghLwWSJKnC7EFR\np045BZ54Aq69FoYN6+KgL38Z/vCHfGPBBrOuJKlz9qCoYr71LXj1VZg5cx0HnXlm7mY5//ya1SVJ\nGhrsQVGXnnsOpk7NQz7HHdfFQQsXwpQp8Ktf5S3xJUnqwB4UVdRmm+U7IH/hC3kzt05tsw3853/C\nBz+YN1TImzyDAAARXklEQVSRJKkC7EFRt264AU46CebMga226uKgE07IS5AvuaSmtUmS6p89KKqK\nww+Hz30Opk+HV17p4qBvfzuvTb7yyprWJkkanOxBUY+kBMcfn0dxZs/uYtHOffflfVF+9zvYdtua\n1yhJqk/2oKhqIuC//gueeirvdt+pvfaCGTPg2GNh5cqa1idJGlwMKOqxkSPhmmvyNJPZs7s46DOf\ngU02ga9+taa1SZIGF4d41Gv33w8HHQQ33tjFyuKlS2HPPeGyy/LdjyVJQ5pDPKqJPfbIdz1+z3tg\nyZJODhg7NnezHHccPPtszeuTJA189qCoz/7v/4Vf/hJaWmDUqE4O+MIXYMEC+PnP8yQWSdKQ5N2M\nVVMp5f3ZGhvhhz/sJIO8/jrsuy+ceGK+uY8kaUgyoKjmXn0V9t8fPvABOOOMTg6YPx/22w9uvx12\n263m9UmSiuccFNXc+uvnEZxvfxuuv76TA5qa4BvfyF0tr71W8/okSQNTnwNKREyMiNsj4uGIeDAi\nTiu1j4mImyPikYi4KSJGl50zMyIWRMS8iDi4Eheg4k2cCFdfnUdyHnqokwM++tHce/L5z9e8NknS\nwNTnIZ6IGAeMSyndHxEbAn8EpgPHA8+llM6NiDOAMSmlGRGxM3AZsA8wEbgVmNzZWI5DPAPTZZfB\n//k/eSPZLbbo8OGyZXnp8Xnn5eU/kqQho6ZDPCmlp1NK95devwzMIweP6cClpcMuBY4svZ4GXJlS\nak0pLQQWAFP6+vNVfz70ITj6aHj/+/P82LWMHg2XXw6f/CQsWlRIfZKkgaMic1AiYhtgD2AOMDal\ntBRyiAG2LB02AXiy7LTFpTYNIv/+7zmLfOpTeZXPWt72Nvj0p+HDH4ZVqwqpT5I0MPQ7oJSGd64C\nPl3qSen4a8mxmiGkoSEP9fz2t/Cd73RywBln5PXIX/96zWuTJA0cw/tzckQMJ4eTH6WUri01L42I\nsSmlpaV5Ks+U2hcDk8pOn1hq69SsWbNWv25ubqa5ubk/paqGNtoIrrsO3v522HFHOLh8OvSwYfDj\nH+cbC77rXfkgSdKg0tLSQktLS7++o1/7oETED4FnU0qfK2s7B3g+pXROF5Nkp5KHdm7BSbKD2q9/\nneej3HVXDiprufbafGPBP/0p31xQkjRo1XSjtojYD/g18CB5GCcBXwJ+D8wm95b8DTgqpfSP0jkz\ngROBleQhoZu7+G4DyiDxgx/kbVDmzIExYzp8eOqp8NxzcMUVboUvSYOYO8mqLn3mMzB3LtxwAwwv\nH1R87TWYMgU++1k44YTC6pMkVZcBRXWptRWOOAJ22gnOP7/Dhw8/DM3NcPfdnYwDSZIGA7e6V10a\nPhx+8hP41a/gv/+7w4e77AL/9m/wznfCWWe5R4okCTCgqEY22STfq+df/xXuvLPDh//7f+fxn6VL\nYffd4d3vzge3thZSqySpeA7xqKZuuQU+8pG8T8q223ZywCuvwOzZcOGF8OSTeW7KiSfC1lvXvFZJ\nUmU4xKO6d9BBuRdl2jR46aVODthgAzj++JxgfvUr+Mc/8p4phx8O11wDK1fWvGZJUu3Zg6KaSymP\n6jz9dM4cw4Z1c8Jrr8FVV+Velcceg499DE46CbbbrhblSpL6yR4UDQgReRv8Zcvgy1/uwQnrrZfH\nhe66C269FZYvh6lT8xa1P/1pJ3cmlCQNdPagqDDPPpu3QTnrrHz/wF5ZvhyuvjovC5o7d02vyuTJ\n1ShVktQP9qBoQNl883zPns9+Nu802yujRsGxx8Idd+Q99dva4J/+Kd/f58orYcWKqtQsSaoNe1BU\nuF/8Aj760Twv5YwzYPToPn7RihX5Hj8XXgh//jMcdxx8/OPw5jdXtF5JUu/Yg6IB6X/9r5wnnn4a\nmprgW9/qYwfIyJFw1FF5nsqcOTBiRN6ldv/98x2UX3ut0qVLkqrEHhTVlQcfhBkzYN48+NrXct5o\n6E+MXrkyb/p24YXwhz/Ahz6Ue1V23bViNUuS1s178WjQuOMO+OIX8+tzz8074ffbwoVw0UVw8cV5\n47dPfCInoPXXr8CXS5K6YkDRoNLWllcRf+lLeRrJ2WfDbrtV4ItbW+GXv8wrgO65J4eUXXbJs3Y3\n3xw222zNa8OLJPWbAUWD0uuvw/e/D//+7/muyGedBRMnVujLn3wSrrgCnngir3tufzz3HPz973l8\nqTywdAwwHd9vtpmhRpI6MKBoUFu2DM45B/7rv/LozBln5JsQVk1K8OqrawJLeXjpGGbK3zc09CzM\ntL9/05vyLZ8laZAyoGhIWLQIzjwzz3390pfg5JPzAp66UB5qehJmnn02dxG95z1w9NFwwAGGFUmD\njgFFQ8pDD+UVP3Pn5uGfo4/u54qfoixcmCfbzJ6dh5re9748L+Yd7+jBjYokqf4ZUDQktbTkFT9t\nbXnFz7veVXRF/fDYYzms/OQneWOY978/h5X99hug6UuSDCgawlLKv9dnzoQdd8xzVSqy4qdI8+fn\nXpXZs/PQ0Ac+kLuJpk41rEgaUAwoGvLKV/wcfnhe8TNpUtFVVcDcuWt6Vl5+OfeqHHUU7LNPvj20\nJNUxA4pUsmxZHu75/vfzxrEzZlR5xU+tpAQPP5yDyk9+knfKPeqo3LOy556GFUl1yYAidbB4cV7x\nc911efjnlFPqaMVPf6WUb2I0e3YOKw0Na3pWdt/dsCKpbhhQpC48/HDuRXnooTz8c8wxg2waR0pw\n3305qMyeDaNGrQkr3ndIUsEMKFI37rwTTj8dVq3KQ0AHHlh0RVWQEtx775qwsvHGa4aB3vzmoquT\nNAQZUKQeSAmuuioP+UyenO/xs9NOuUclYs3zoNDWBnPm5KDy05/mnWuPPjoHlsmTi6srpXxPpIi8\n18ug+QcuqTMGFKkXXn89b5v/ta/lDV3b2vLvzfY/ehFrB5ZqPTc25nkxo0blR/vrztq6+3ydbSPa\nGPXH39B49U+In10F48fnpU4jR+bJtq2taz931tbdc0+PXbUqB5OU8j/4xsa8g25Pnit1zAYb5NsM\njB+fH296k/dRkqrEgCJVSPvvzUo8d3fMypWwYgUsX/7G587a1vVZT9pWroRRjas4sPHXHBi3s9FG\niQ1GN7LhmOFsNKaRjTcbzujNGtlk8+GM3ryRYaOqEBbKe03a2voXfPoanF5+GZYsgaeeyo8lS2C9\n9dYOLeXhpfz1qFHF/gGVBpgBEVAi4lDgfKABuCildE4nxxhQpCppa8u9R8uX59/RixfnmzqXPxYt\nys/PPANbbpn3kpk0Kd9Fuv11+2Ps2EGyI39K8MILawJLeXDp+H7DDdcdYsaPh3Hj6mvJWEq55+r1\n13P33ciRDq2pZuo+oEREAzAfOBB4CrgXOCal9JcOxw3JgNLS0kJzc3PRZRTCa28uuoxOrVyZfx+X\nh5aOj+efz7+by0NLxyCzxRadr5qq52vvUltbvujugszSpXmCcmchZuxYWh5+mObJk3Ng6OqxYsW6\nP+/Jo/w7GhpyD1Z7192oUbnXqONj/fU7b1/Xo7tzRo2ChoaB+e+8QobytfcloNT6tqlTgAUppb8B\nRMSVwHTgL+s8a4gYyn94vfbmosvoVGMjbLVVfnTl9dff2AvzyCNw221r3r/0Uv693LH35ZZbWliy\npLmi00uq3pvT0ACbb54fu+/e9XFtbXlyU8cQ89BDcNtttPzlLzTvsQeMGNH5Y9QoGD2668978xg5\ncs3QWrtVq3I32muvvfHx6qudt7c/nnuud8e/9loOSiNG0AI0jxnT+0lUlZiINXJkoZOy6/m/9XpU\n64AyAXiy7P0icmiRNECNGAHbbpsfXXnttTU9MO3PDzyQ74147bWVmXvb/mifeNyTObc9nczctwnQ\nDTQ0bEnEljQ07LH2ZxvCAyNm8beRs/L7Nmh4HWIlNLxW/cnZeW7UMNraNiClDfo2v2oEtA2HtGHP\n5lulVW0Ma13B/X86k3/s8hka21YwMi2nsW0FI9pKz2k5I1Yup3HFmrbGVcsZ0bac4atW0Ni2nMZV\nL9K4qvR+1XKGt79uzc/DVy1neGv783KGta5Y67khtQGQIkjRQIrSP5hoIHX2DywaoKHsuezzaGh/\nzp+t9dzZdz3zDFxzTY3+y+zEffcNqPHYWgcUSUPQeuvlVc0dVzbPmpUflbRqVc+DTk8mMVdisnRn\nbS++CO94R2UnZLf/rO4+72mQGTas58GnZ4FtPf7+8vrscfj4Lmtc1QatCV6p4D+T8mtftQpWvp5Y\ntbKNVSvbWPl6oq21jdaVua115drvV7V2/ty2as3xaVXbmrbWNoZFYsTwNhqHJxqHrXle+NoF3PP0\nyZX9A98Lv0oNDJx4Uvs5KG8DZqWUDi29nwGkjhNlI2LoTUCRJGkQq/dJssOAR8iTZJcAvwc+mFKa\nV7MiJElS3avpEE9KaVVEfAq4mTXLjA0nkiRpLXW5UZskSRra6up+rhFxaET8JSLmR8QZRddTKxEx\nMSJuj4iHI+LBiDit6JpqLSIaIuK+iLiu6FpqKSJGR8RPI2Je6d//1KJrqoWI+GxEPBQRD0TEZREx\nouiaqiUiLoqIpRHxQFnbmIi4OSIeiYibImJ0kTVWSxfXfm7pz/v9EfGziNi4yBqrpbNrL/vs8xHR\nFhGbFlFbtXV17RHxL6V/9w9GxNndfU/dBJTSJm7fAQ4BdgE+GBFD5darrcDnUkq7APsCpw6ha2/3\naWBu0UUU4FvADSmlnYC3AIN+yDMixgP/AuyVUtqdPNR8TLFVVdUl5P+vlZsB3JpS2hG4HZhZ86pq\no7NrvxnYJaW0B7CAoXXtRMRE4CDgbzWvqHbecO0R0Qy8G9gtpbQb8M3uvqRuAgplm7illFYC7Zu4\nDXoppadTSveXXr9M/iU1odiqaqf0H+zhwA+KrqWWSn9zfEdK6RKAlFJrSunFgsuqlWHABhExHFif\nvLP0oJRSuht4oUPzdODS0utLgSNrWlSNdHbtKaVbUyptRgJzgIk1L6wGuvj3DvAfwOk1Lqemurj2\nk4GzU0qtpWOe7e576imgdLaJ25D5Jd0uIrYB9gB+V2wlNdX+H+xQmxC1LfBsRFxSGt66MCLWK7qo\nakspPQWcBzwBLAb+kVK6tdiqam7LlNJSyH9BAbYsuJ6inAD8qugiaiUipgFPppQeLLqWAjQB+0fE\nnIi4IyL27u6EegooQ15EbAhcBXy61JMy6EXEEcDSUg9SlB5DxXBgL+C7KaW9gFfJXf+DWkRsQu5B\n2BoYD2wYEccWW1Xhhlo4JyL+FViZUrq86FpqofSXjy8BZ5Y3F1ROEYYDY1JKbwO+CMzu7oR6CiiL\ngfI7fkwstQ0Jpa7uq4AfpZSuLbqeGtoPmBYRjwNXAO+MiB8WXFOtLCL/beoPpfdXkQPLYPfPwOMp\npedTSquAq4G3F1xTrS2NiLEAETEOeKbgemoqIj5GHtYdSsF0e2Ab4M8R8Vfy77g/RsRQ6T17kvzf\nOimle4G2iNhsXSfUU0C5F9ghIrYuzeg/BhhKKzouBuamlL5VdCG1lFL6Ukppq5TSduR/57enlI4r\nuq5aKHXxPxkRTaWmAxkaE4WfAN4WEaMiIsjXPdgnB3fsHbwO+Fjp9UeBwfyXkrWuPSIOJQ/pTksp\nrSisqtpYfe0ppYdSSuNSStullLYl/wVlz5TSYA2nHf/M/xx4F0Dp/3mNKaXn1vUFdRNQSn+Tat/E\n7WHgyqGyiVtE7Ad8CHhXRPypNB/h0KLrUk2cBlwWEfeTV/F8reB6qi6l9Htyb9GfgD+T/yd2YaFF\nVVFEXA7cAzRFxBMRcTxwNnBQRLTvrN3tksuBqItr/09gQ+CW0v/rLii0yCrp4trLJQbpEE8X134x\nsF1EPAhcDnT7F1E3apMkSXWnbnpQJEmS2hlQJElS3TGgSJKkumNAkSRJdceAIkmS6o4BRZIk1R0D\niiRJqjsGFEmSVHf+P8R8bDsDIEQ7AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x11a9c6208>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.plot([x+1 for x in range(15)], firstbycopies['docid'])\n",
    "plt.plot(xs[0:15], ys[0:15], color = 'r')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Basically, the change we need to make is represented by the difference between the red and blue lines. We need to subtract about 500 titles that are only represented once, and add 500 titles that are represented by 2 -> copies."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2 126\n",
      "3 71\n",
      "4 42\n",
      "5 67\n",
      "6 55\n",
      "7 3\n",
      "8 25\n",
      "9 25\n",
      "10 9\n",
      "11 14\n",
      "12 7\n",
      "13 3\n",
      "14 6\n",
      "15 5\n",
      "16 4\n",
      "17 0\n",
      "18 3\n",
      "19 2\n",
      "20 4\n",
      "21 3\n",
      "22 0\n",
      "23 1\n",
      "24 3\n",
      "25 2\n",
      "26 3\n",
      "27 0\n",
      "28 2\n",
      "29 0\n",
      "\n",
      "remove: 492\n",
      "add:  485\n"
     ]
    }
   ],
   "source": [
    "needed = dict()\n",
    "for i in range(2, 30):\n",
    "    if i in firstbycopies.index:\n",
    "        needed[i] = ys[i-1] - firstbycopies.loc[i, 'docid']\n",
    "    else:\n",
    "        needed[i] = ys[i-1]\n",
    "\n",
    "add = 0\n",
    "for i in range(2, 30):\n",
    "    print(i, needed[i])\n",
    "    add += needed[i]\n",
    "\n",
    "print()   \n",
    "print('remove:',firstbycopies.loc[1, 'docid'] - ys[0])\n",
    "print('add: ', add)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "However, here's the rub: we want these additions and deletions to be evenly distributed across time, and we want the sampling to reflect local conditions in each decade. To achieve that, we're going to need to do everything we did above *in each decade.*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "23"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "492 // 21"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "So, add 23 vols in each decade."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "error\n"
     ]
    }
   ],
   "source": [
    "alreadysampled = set(first.docid)\n",
    "selected = []\n",
    "\n",
    "for decade_floor in range(1800, 2010, 10):\n",
    "    dmeta = meta.loc[(meta.inferreddate >= decade_floor) & (meta.inferreddate < (decade_floor + 10)), : ]\n",
    "    grouped = dmeta.groupby('copiesin25yrs')\n",
    "    bycopies = grouped.agg({'docid': 'count'})\n",
    "    \n",
    "    # vols that can be selected\n",
    "    toselect = dict()\n",
    "    for numcopies, df in grouped:\n",
    "        toselect[numcopies] = []\n",
    "        for d in df.docid:\n",
    "            if d not in alreadysampled:\n",
    "                toselect[numcopies].append(d)\n",
    "        random.shuffle(toselect[numcopies])\n",
    "               \n",
    "    totalcopies = []\n",
    "    binlabels = []\n",
    "    for idx, row in bycopies.iterrows():\n",
    "        if idx == 1:\n",
    "            continue\n",
    "        else:\n",
    "            binlabels.append(idx)\n",
    "            \n",
    "        copies = int(idx) * row['docid']\n",
    "        totalcopies.append(copies)\n",
    "    totalprob = np.array(totalcopies) / np.sum(totalcopies)\n",
    "\n",
    "    cumulativeprob = []\n",
    "    cumulate = 0\n",
    "    \n",
    "    for prob in totalprob:\n",
    "        cumulate += prob\n",
    "        cumulativeprob.append(cumulate)\n",
    "        \n",
    "    copycounts = []\n",
    "    for i in range(0, 23):\n",
    "        prob = random.uniform(0, 1)\n",
    "        for label, threshold in zip(binlabels, cumulativeprob):\n",
    "            if threshold > prob:\n",
    "                copycounts.append(label)\n",
    "                break\n",
    "\n",
    "    for acount in copycounts:\n",
    "        if len(toselect[acount]) > 0:\n",
    "            chosen = toselect[acount].pop()\n",
    "            selected.append(chosen)\n",
    "        else:\n",
    "            print('error')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "482"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(selected)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "thechosen = meta.loc[meta.docid.isin(selected), : ]\n",
    "thechosen.to_csv('oft_reprinted.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.collections.PathCollection at 0x107494828>"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAi0AAAFwCAYAAAB0EUvGAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztvXtwZNd93/k9FPtFPAYza0SyKROgSI6Gybw43uHSjnaD\nUTgSraRsl7ll7riUxFzEEs1MyEoURzQrKbKCnbWYUkqRrJ3FQIGN2CFmJrGTzYtyOxMD2WKtZSAU\npVHcHJG0BEiibHVXSjuxHIQcyb/80d2D7sa93fdxzrnn3Pv9VHWhu3H73vP+fc/vvJSIgBBCCCHE\ndW7JOgCEEEIIIVGgaCGEEEKIF1C0EEIIIcQLKFoIIYQQ4gUULYQQQgjxAooWQgghhHiBFtGilFpW\nSn1LKXU15P8/rZT6Yuf1olLqiI7nEkIIIaQ46PK0/AqA9w/5/1cA/C8icgzA/wHgM5qeSwghhJCC\ncKuOm4jIi0qpmSH//1zPx88BuF3HcwkhhBBSHLKY0/JXAXw2g+cSQgghxGO0eFqiopQ6BeBRAO+x\n+VxCCCGE+I810aKUOgpgCcBDIvLtIdfxMCRCCCEkR4iI0nEfncNDqvPa+w+l7gDwGwD+koj8/qgb\niQhfGl7PPPNM5mHI04vpyfR09cW0ZHq6/NKJFk+LUmoVwByA/0Ep9TUAzwAoAxARWQLwdwEcAHBe\nKaUA3BCR+3U8mxBCCCHFQNfqoZ8e8f+fBfCzOp5FCCGEkGLCHXFzzNzcXNZByBVMT70wPfXBtNQL\n09NdlO7xprQopcS1MBFCCCEkGUopiIMTcQkhhBBCjEHRQgghhBAvoGghhBBCiBdQtBBCCCHECyha\nCCGEEOIFFC2EEEII8QKKFkIIIYR4AUULIYQQQryAooUQQgghXkDRQgghhBAvoGghhBBCiBdQtBBC\nCCHECyhaCCGEEOIFFC2EEEII8QKKFkIIIYR4AUULIYQQQryAooUQQgghXkDRQgghhBAvoGghhBBC\niBdQtBBCCCHECyhaCCGEEOIFFC2EEEII8QKKFkIIIYR4AUULIYQQQryAooUQQgghXkDRQgghhBAv\noGghhBBCiBdQtBBCCCHECyhaCCGEEOIFFC2EEEII8QKKFkIIIYR4AUULIYQQQryAooUQQgghXkDR\nQgghhBAvoGghhBBCiBdQtBBCCCHECyhaCCGEEOIFWkSLUmpZKfUtpdTVIdd8Sin1mlLqC0qp4zqe\nSwghhJDioMvT8isA3h/2T6XUjwK4S0TuAfBhAIuanksIIYSQgqBFtIjIiwC+PeSSHwfwq51rfxfA\nPqXU23U8mxSTVquFzc1NtFqtrIOSO4qctkWOOyE+YGtOy+0Avt7z+Y3Od4TE5uLFy5iZOYTTpx/D\nzMwhXLx42dqzkxg1nwxhlmmbNUWOOyHeICJaXgBmAFwN+d+/BvAjPZ+vADgRcq0QEkaz2ZRa7YAA\nXxRABPii1GoHpNlsGn/26uolqdUOyL59J6RWOyCrq5eM/CYrskzbrCly3E3QbDZlY2OD6UdERKRj\n17VojVstaaM3APxgz+d3dr4L5Nlnn735fm5uDnNzc6bCRTxja2sL5fIsdnaOdr45ilJpBltbW5ie\nnjb23FdeeQWPPvoY3nzzP3SefRXz86fw4IPvDX1uq9XC/Pzj2NlZi/ybLMkqbXXQarWwtbWF2dnZ\nRGH1Oe6uceHCZ/Dkk38b5fKd+O53t7G8fB5nzjySdbCIRdbX17G+vm7m5rrUD4BZAF8K+d8HAPzb\nzvsHAHxuyH20qzySH7LoEa+uXpJKZVKAg51ntl+Tk/fJxsZG6O82NjZk374TsX6TJb56G3R4s3yN\nu2ssLi4JUGM6kj6g0dOiS7CsAvgmgDcBfA3Ao2ivEvpQzzWfBvA6gC8iZGhIKFpIBLpGanLyPuND\nLrvGbE2AeEbNNUMYxWVvM211oDONXYi7z8MqzWazI+6P9Qn1iYnjzgp1YgfnRIvOF0UL6TKsAbfV\nuPd7Sy51hMs9UqlMxZrTkrUIiOON8Mlw6vZmZRl3n+Y/BbGxsSETE0f2iPtKZcqLskTMQdFCck+3\nAZ+YOCKVyqQsLi4ZfV6Ysdrbk1+TSmVSGo1G5Hs2Go1MRUBWHh8bAiBN3FwSZ6555ZKwG4fnOsLl\nqAA143WXuA9FC3EWHYZgb+N3zGjjN6qHm8Rb4lKvOYu5NTbj73v+iPg3/ymMbrqOjx+20tkgfkDR\nQpxElyGw6WaO2sONI8Zc6zXbCE9v+mQRf5/zx9UwJcWkB8sl7xiJjk7RwgMTiRZ6l/dev/4SdnbW\nMD//eKIN1cbHx/Hf/ttX0V4lv7sEtVyexdbWlsZQ7y517X1Od6lrL9PT0zh58mSk5a9R72mL6elp\nLC+fR612CpOTJ1CrncLy8nltS3kHN2W7cOEzkeKvc9M9n/Ony9NPf8RYHtkkTl7EgZv/EQD0tBA9\n6HJvd7011eodVpZOmujhutprDuqlpu25BsW1Wp0aGf8sh2ei5I/NHn1vWlSrU7KwcC7zsuIartYp\nEg1weIi4ho5GZe89/q4ANRkfP2bUsJlY4ePKqqFh6JjsHCZWFxbOhcY/awPUbDaHhs+moMo6LWyR\nVgTmZc5PUaFoIU6S1lAHNUzj44dlZWXFy5UuLo+/65rsPMzohsU/SwM0yqthW0QUwRhnufnfsFWB\nrtbNPELRQpwlTWNQlF6nC+ic7BxXrGa5BHvUc22LCN/L/Kj6nuXmf2FiybWVY0WAooXkFh+GVZLi\nUu9O9+6lceOWRT5HESRZiAhfy3wU45/V5n9h+dhoNLwWib5C0UJyjUvGXRerq5ekWp2SsbF3S7Ua\nbTfdpERNP53nxCTJM9v5HFWQZCEifCvzcbYKyEIkhImllZWVWCLKt3xxFYoWQjyi2WxKqTQhwH4B\nTgiwX0qlcSMNYVzX9+LiklQqUzIxcTyxgfbJ3R5VkJgyVnkxgnE8KFmJwLSeFp/KtetQtBBj5KVR\nTYOuNOje5/LlywLc1tdQArdJvV7XGgbdkxVNPjPuM3SWyazKeJ6MYNx8zyLNw8RSFBHl+1wj16Bo\nIakIa0Dy1KgmRVca9N6nXB4X4O6+XilwV6hoSRqGLFaimH5mXspkEiPoegfCh7k4SVcPFWFVl00o\nWkhiwoyAzZ6Fq42xrjTYe5+1PXNHyuV9gfdNE4Yseocmnxnl3jrLkslyGdcI+iLWXKvLOr2k9LTo\ng6KFJGJYRbTVs3C5MdaVBkH3qVZnpVKZkrGxo0PjnTYMWfR+TT1zVFroLEumy2UcI0iDmQzdeeiD\nJ8kXKFpIIoYZAVtzE1xujM15WnYnAY7qBeoIQ9reZtDvo+zHYWJzvrC00FmWbJXLqEaQQxP9RBni\nMZWHrnmSfIWihSRiVMU23bPwoTHWlQZp7pNlDy+ot5qldywsLdKUpUFDZLNcRjGCrot7m0TdIG5h\n4ZzzbUuRoWghiRllEE32LHxpjHWvHkra+7fdwwvKnyiHH9oIV5DnJ0m4goygi+WyCEMTUbx3YR5L\nF8spCYeihaQiS5dnERpjXwnyOIyNHZSxsf5dc13pweo8PsDFcpnnoYk0u+mGbRA37BBMki06RYtq\n388dlFLiWpiIXlqtFra2tjA7O4vp6emsg0M6tFotzMwcws7OGoCjAK6iWv1zUOqWvu9qtVPY3r7m\nRN51y9L4+Di+853vDC1Tm5ubOH36MVy//tLN7yYnT+DKlQs4efIky6UlgspZUJkKu+6ll17ED/3Q\newJ/D4B56CBKKYiI0nIzXepH1wv0tGRKnnt3ushzGgV5HFz0QvQSdc6Ni8NARUTHbrqul0nSDzg8\nREzg8nJkV3A9jXQIqiSrh7IirhApmrFzMd/i5lnSDeKIO1C0EO340gvNsqFyPY1cF1RhpMnTJCt/\nimDsms3mzTkeLpaHoonHokPRQrTj+nJkFxrhtGnElVl7SSu0fI23Sbonig+ed+VauhRBPJI2FC1E\nOy43/jYaYdP7Z5j2grguOoPQVebYa99lN02fl/aJ4v6UB5JfKFqIEVxs/G00wnEERZI0SmOco/ZG\nXRadYegUWkXrtYfFdzdNmwLYPfE7K3wMc9GgaCHaGKzwrjUAaRvhUSQx9nHTKKlxjuudcVF0DiMr\noeVaGY/LsHLRn6aXBNgvwF2xyo+Lc2DC8DHMRYSihWjBhwqfphGOgo1hlaTCKIlB980g2xZatsu8\n7vwI27W4Xq8HHsdRrU7JwsK5XHrqfAxzUaFoIanxqcInaYSjYisd4hpnH+eoJMWW0LJd5k0IpL3l\n4pIAt8nY2LG+Z9jyBtqmN15xw+yboM8TFC0kNb40Ul1MNjhd4zI+flgqlUlZXFzS/gyReHHwSVT6\ngs3l0abyr/++zY7n0f2TrnXU30ERuLi4FDnMPniV8wxFC0kNjWI/i4tLUqlMycSEO/NBfJuj4jpx\nynzaJfYmOwXdcjE2dlCAu7U94+zZJwSoCXCPADU5e/aJ1GHtDW8awRCWd13hMqyOsK3LHooWogUa\nxTYuN2p0aeslSpnXscR+WJnSkafNZlPq9bq2crsb3jUBNgRYy2yiexDDROCo9PTNq5xHKFqINmgU\nB1cobQjQZKOWY4aVeZ1L7Ied46RrmEJXx8OUYdd137TbBrjaKSkKFC2EaKTZbEqpNNGZH3BCgP1S\nKo2zUSsgupfY9wokk/NcdHhuzM/BSXffNAKNXuVsoWghRCPNZlPK5X19DWu5vI+iZQR59NKZXGLv\n+jCFKcMe576jylSa/+exvPoCRQshGnHNmPjQuEYZ5vAhHkGYWmLvwzBFWJ6lzcsov087dMYVQu5C\n0UJIAK4tT02CDw1vlPTyIR7DMCW4fBymsJGXaeugS3WY7IWihRSCOIZDVy8t7plCpnc7dbHhHeWZ\n8iUeUTGRz754oGzlZVpvp2veUtIPRQvJPXFEiK6G1aZICsKXhndUevsSjyiE5bNPwiMNe/OyKWNj\nB6Ver2t9Dj0t+cY50QLgIQDXALwK4KMB/58E8K8AfAHAlwD8zJB7mUgzYpCsPQ62jaQPKy1MM8wz\n5VM8hjFqQzNfh77i0D9JvTsx+W4j8Y7i7RzW1vg49FYUnBItAG4B8DqAGQCljjA5NHDNLwD4xc77\n7wPwnwHcGnI/M6lGjOCCx8G2kbSx26kPDW/eDUhQPrePepjyXpBFpb0dwLgA+yTNZntxnjeqTOVx\n8nfecU20PADgsz2fnxr0tnS++3Tn/Z0AXh1yPwNJRtIS1Bi45HGwaSRNiyQXG94kYTIZDxtpFJTP\nlcqkTEzcl4uhryjsCre6AMcyi3devHdFxTXR8jCApZ7PHwTwqYFrxgH8NoBvAvgvAH50yP1MpBlJ\nQVgPxzWPg01jnwdPQlRcWwlkMzyD+RznkD5bmBaH7fiuiY7N9pKSp3lSRcRH0fIwgH/QeX8XgK8A\nGA+5n4k0IwkZ1sMposehF9fDpwPXerhZhGcwn10SrCYFXDfeXaFWrc4KUJNa7bD1eLtWDkk8dIqW\nW5GeNwDc0fP5nZ3venkUwC92FMnvK6W+CuAQgP8YdMNnn3325vu5uTnMzc1pCCZJwtbWFsrlWezs\nHO18cxSl0gy2trZw8uRJLC+fx/z8KZRKM7hxYxvLy+cxPT3dd49Wq4WtrS3Mzs7u+d8wpqenY11v\nG9fDlzTdexmW/1nE3WR4wtJrMJ/PnHkEDz743tRpmzZ8rVYL8/OPY2dnrZMeVzE/fwoPPvje1GG6\nePEy5ucfR7k8i7fe2sInPvExnDhxHG+99RZef/113H///bj33nv1RCyEwfhGaWt0Po8kZ319Hevr\n62Zunlb1AHgbdifiltGeiHvvwDX/F4BnOu/fDuDrAA6E3M+E0CMJidLDSTt5juhHV7q71sM1FR7X\ny2lQ+EwNmbiwasr2UnPX89934NLwUDs8eAjAlwG8BuCpzncfBvChzvvvB1AHcLXzOjPkXmZSjSRi\ndfVSZ/XAbQLcJeXyvsgVWoeBKcIQjG50G3aXhkNMhMc1YTZIWPgajYaRcGe9asp2frie/3nAOdGi\n80XR4g79lbkpwPNSrU5Frsxpe4Ls/STDRA/cNfGoMzyuT/IcFj4TglLnqqkk+WQ7P1zP/zxA0UKs\nkLYyp+nBsPeTHKZdPFxPr6DwVatTUq/Xb06I1y0odayaStrpoKclf1C0ECvoqMxJe4Ls/aTDtSEd\n1+mmV3sYZFIWF5eyDlIfvflZKk1IubzPuAcyzaqptG2H7fLr+hYLvkPRQqyho/FIujEZez/pYKMa\nj8XFJalUpmRiwk2h12w2pV6vZ1ovopYpHZ0O2+U3zvM4dB0PnaJFte/nDkopcS1MRSftUsCkv+8u\nu+xd4njmzCOxn0/IKFqtFmZmDmFnZw1Ae/lwrXYK29vXMlv+GlRvNjc3cfr0Y7h+/aXuVRgbew/+\n+T//Jbzvfe/LJJxBuJieushz3EyhlIKIKB33ukXHTUi+mZ6exsmTJxNVyIsXL2Nm5hBOn34MMzOH\ncPHi5ci/PXPmEWxvX8OVKxewvX1Nq2BptVrY3NxEq9Uycj3xi+7+L20jBPTu/5IFYfVmdra9b0p7\nEeZlAO/GH//xn+AnfuJMrLplmu6+KrXaKUxOnkCtdkr7viqmCavzrpWVwqHLZaPrBQ4P5QZXh3ji\nunbpCs4/NsvqqGGIUWFZXb0k1eqU2DjAMC29cfVpuHJYnXe1XXMZcE4L8QEXJ9PGbXDYQBUHG5M/\nowjgKPWmXq/L2Fh2BxjGJY3wz2Juy6g6z4nu8aBoIV5gwuAnndTb/U1cIeWi8CLmsHP44PD6EHUX\nal/EdJqwZuHljFrnffIcZQ1FC/EGnT2SJA3Y4G/i7jfhk3EgbrOxsRF5g7Yo9caX3n5S4Z9V3Rv1\nXIqV+FC0EK/QUcmTNGCjzlCJ2tj7YhyI2ywuLglQiyWYR9UbHwxoUvExSuyYjHtYnef8tmRQtJDC\nkaS3Nuw3cRs8H4wDcZddw/2cAAcEOCpAzblN7JIQpW4k3bwtTOzYEA+D8aLXNTkULaRw6PS0sJGh\nCLNNv4BuCrAh4+OHvZ8bFUc8JClzQWInq3rN+W3JoWghhSRJb41DO3uhi9s+eRTQtuI0KHayEg95\nzENbULSQwpJ29VDRYcObHXkT0EUUD3nLQ1voFC3cxp+QArF3G3hgcvIErly5gJMnT2YYsmKQ9kgM\nl8hyO3vbR3z05huA3OShLXRu40/RQrwiT41+Fvhybgrz2Q/Sioc0+WyrjHTjWC63j1DgGWjx0Sla\nMh8OGnyBw0OFJ2w4h3Mx9OC6i5v57BdJh199yGcOp+oBHB4ieSWsV+OLhyAJWXgVXPJkDLreXchn\nl9Inj/hSnzmcqgee8kxySavVwvz849jZWcP16y9hZ2cN8/OP3zQgeTxZNc0p2GlIc3K3Tgbjf+HC\nZzLP56zypEhcuPAZ7OwcgOv1uf9UbQC4ihs3tm8KbJIBulw2ul7g8FBhGbUZnKtu2qTucZfjZIOg\n+FerU5mmSdHzxAbNZrNzSvV+L9LZ9eFUH4DG4SF6WkhqWq0WNjc30Wq1Ut1nWK9menoay8vnUaud\nwuTkCdRqp7C8fH6Pp0BXWKKSplfe7lXejt7eJvADzvU2TRHkPSuX78TTT39kZD7bDJOLHgCf2dra\nQqXyLgD/N4BTAE4A+GE8/fRHUuezifp/5swj2N6+hitXLmB7+xon4WaNLvWj6wV6WrxC92S6Ub2a\nYV4N2xP74pzaGxTmRqOx5ywaoCaNRiNWGHzdg2ZY+mUVL3pazNOfxk0BnpdqdSrSgYQu1X8SHXBz\nOeICphr4pBvI2TY2UTbXGtaQbmxsSK12Z+csmvsEOCDV6mzkzbny0Ei76Hp3MUx5I86BhN32oHvQ\naVB5p9h0G4oW4gQuncWRRViiHGEf7f9rAmwIsBa5oc1TI+2it0hXmFyMmytEOZCwVJqQWu2ATEwc\n2eOV7C3vLrVFZC86RQvntJDEuDSzPouwjJpnM2p+xO7vH8bk5IdRqz0caf5Gq9XCCy+8gFtvnQm9\nt0+4spKpFx1h4iqk4Qym8d768v24ceN72NlZwx/90TKAgwgr7y61RcQwutSPrhfoafGKtK50nT3R\nrNz6YXFIO+cliG4cR/U8STg2vB958oTZYm+aPS/A3bJ7Mvbw9OSwnruAw0PEJVzaEdM1d7zOhnRv\no/6cADWZmDjORjoituYBcbgiGb31pVqdknJ5X6zy7lr9J210ihbuiEsywZcdMXWga3fVoN05x8eP\n4NOf/lv4wAc+kLt0043NMudK+fZxZ9/eMF+58tt9Zxt94hMfw4kTx72KD9G7I+6tOm5CSFy649c7\nO3vHqPPWGE1PT2uJU/+4fdsQfu9736RgiYjNMtedrzQ/f6rvMEGb+eTrQX+99eXMmUfw4IPv9U54\nEXPQ00IywZWeqG9EOVXXx961DbIoc1nlxd64rqNS+XG8/PLncO+991oLByEAzx4iOSDqDrekn1G7\nc+ZtxYrOHU6zKHNZrYzqX4lzGcDDePPNd+C++37E+zLhCrZ33yZt6GkhmeKyV8DlsAWhy5PgSrxN\nDW+4Ej+T7JaF3wDwMIC9ZQJA7tPBFL4OvWWFTk9L5quFBl/g6qFCknQXXFMrBXzcbVbHihVX4h13\nyTBXjexldfWSVCqTAhzsKxPj44flsccedyKfdcPl7G4CLnkmeSKJoTRpXH1tlNKGO+t49xqcOALM\nFaHlIo1GQyqVqYFlw1UBbvOufI+Cy9ndhaKF5IYkhtK0cfW5UUqzL0yW8R40ON1zZqJszOejwLRJ\nN23Hxw93NiR8XgA/y3cYNssBy1x8KFpIbkhiKE0bV98bpaQu8rhnIek8nycovbvCZZgAs1EW8jDs\n1Gw2ZWVlRSYm7ou0u6yO59lMN9uCm7vvxoOiheQGFz0tIruN0tjY0UI1SmfPPtnpjR8UoCZnzz4R\neJ1OV/wwgzPK+JksC3kbdupPq0sC7BfgLu1xyyLdbHU0estjXgStDShaSGxcrmBJei2mezqrq5ek\nWp2SsbF3S7U65b3BikKcs5J0Goi09zNRFnz3toUxuE3+wsI57R6WrNLNRpuQJxFrE4oWEgsfKptL\nq4fyarBGEdXFbsIV79LBmyL64uhiZ8FkmLKcF9VsNqVer0u9Xmeb4BjOiRYADwG4BuBVAB8NuWYO\nwMsA/hOAtSH30p9iBYaVLT4+T8RNQ1aelt77umLgdcTRh86CbrJqb0yndVHbBF04JVrQ3lX3dQAz\nAEoAvgDg0MA1+wD8HoDbO5+/b8j9jCRaUWFli0+RhV5Uj0cRJiKmiaOvZSiNcOz+NsoEap3YSGtf\n89MVXBMtDwD4bM/npwa9LQB+DsDfi3g/7QlWZFjZklEEoxxGVMPlkmfEFEnj6GNnIc08rqAl67bK\nhq20LnKbkBadoiX1Nv5KqYcBvF9EPtT5/EEA94vIEz3XfKLjhfkzAMYBfEpEfi3kfpI2TKSfKIfs\nkb0UYbt3X3E9b3w7ELTVauH22+/CjRu3ArgTwFdRKt3AG298ZWR4s46rzee7Xu5cRec2/rfquEnE\n55wA8F4AYwB+Ryn1OyLyetDFzz777M33c3NzmJubsxDE/MLj3ZMxPT3NtLJMFKPgw7kv3cMZ5+dP\n9XUWXC1PL7/8Mm7c+B6AF9E1/Ddu/DBefvllvO997xv62+7hjDs7RzvfHEWpNIOtrS1rp2ebTuve\n5508eVLbffPK+vo61tfXzdw8rasG7eGh3+z5HDQ89FEAz/R8/kcAHg65nz6fFCHEG6JMpvRtuNOX\nIbR6vS7A3X1DLMBdUq/XR/7Wdp6ElRNTaV3ECdW6gWNzWt6G3Ym4ZbQn4t47cM0hAP+uc+1tAL4E\n4E+H3M9QshGiB18MkU9ENXw+zhXxgWazKeXyvr70L5f3Rd4N2dZ8D9sCyTeR7Co6RcstGjw13wNw\nFsBvob1C6JKIvKKU+rBS6kOda64BqAO4CuBzAJZEpJH22ST/tFotbG5uotVqOXGvixcvY2bmEE6f\nfgwzM4dw8eLl1OFyDZ1pHpXuEEN7aALoHWLoZXa2PSTUbkqA9jDGNmZnZy2FNJ9MT09jZeUCarVT\nGBs7hlrtFFZWLgQOsQTVgTNnHsH29jVcuXIB29vXtA7X9ZbHqOVEF7afRyKgS/3oeoGeFtJBp1tW\nx72K0OvKyhUeJ225isMcWR6bEETSgzR1UYQ6bwO4NDyk+0XRQkTCG4tGo5Fo51wdDc/GxkbnwLl8\nDk1k3UDHESNFH6LLKv42h+fSHKSZ5Flh6RmlXBa9PI6CooXknqDGsVq9UyqVqdheAF0N7eLikrQP\nE7R3KJtNXJgvwsZ/NKO8YSb32bEpbNMcpBmHqBPAR4kaTtQNh6KF5J69jeNaYsHQaDSkUplK1dDu\nhuc5AQ4IcFSAmiwuLqWJ5h6ybABHGSQfBUXWYe4+P4mHMOx+w/Io6sqaNOUsyfCcqwIp7TOy9k76\nAkULKQS9jWOlMim12pHYXoDuPWq1OwWoSa12OJEY6O/1NQXYkPHxw7G8EK7NFwgizCD52JvMOsy7\nZe9dnbJ3JHU4Rnkfhg2n6JwXEkeEmBJIOgRpWu+iC95JH6BoIYWg2dw9tbXRaMRuaIO8NZXKpDQa\njURhSdPQR2m4XWkAB42BC2IqLlmHeff5ax3PnJ5wDItXUPkZHz+8x8tYqUxam5ulIx+CxIkuQUpP\nix0oWkjuCWqU4rqldYuApKtWojZsQddVq1NSr9czbQRdEVNxyDrMu8/fEEBvOMLKYVD5CRIoQUIm\nrZAK83iYyAfdQmFYekbx5HA122goWkiuGdYoxXEJm+gFJXFJx2m4exvAUmlCyuV9mSw/pqcl2TO7\n6WbK0xL0rF4GDWjYUJCuFThRJgXrzgdTQijNnJ+s5065DkULyTU6GyUXekFxG+7usFgWQiGssXYh\nHeNiM8zDPIPV6qx051NVq1OysHDOmnjqDVtST8Kw50Qpo7rzwbQgzeOE9KyhaCG5pr+XuiHAmjH3\ntS2yHtqKQh4baxthjuIZbDQasrBwLnLvXXe4TaRDnDKq+/kmBemweGU9udtXKFpIInwyOmfPPint\nJc4HBajWGzIVAAAgAElEQVTJ2bNPZB2kSAxL46yHtkaR9TwQX4mSbnHy0xfDmPWwoan2LCxeYYsB\ndC1nzzMULSQ2vjSEItk3hknRnca2h2R0p7tPIjkNUdItqiD0reyfPftEp3Nxj1edi1EE1T2dG14W\nDYoWEgvfGkJfevzBEy/tTLg0ha4ty30SyXEZtgQ3LN2ilg8d+wHZQvcwrmuMnpC+JjZ2yM4DFC1k\nJL0VzhcR0EXHDramGTTKCwvnvErjYQwTJVG3PHdNJOsSf8PiP+oZUTZK2x2C6O68fExM7LysA9/a\nFR3o2PCyiFC0kKFkfTJqGnTtYGuSIKNcrU55k8ZJSeYtyL4xd2Ujsu49Rm2UNj//V73owbsoTm2w\nV2AWK/5JoGghoYQ1JCZORtVNkPs16Q62Jgkzyt3VIS6ncRp8nJehMyy2NkqrVCZlfPy4M6JvGD4u\nhddJ0eMfFYoWEoqtk1FN4FoPPYwoS1xdTeM0JFkBk3VjrrNMmRBjUbfed7kHHzT3w7c6kCbMPsbX\nNhQtJJS4DatLFc6lHvooXDHKtlldvSTV6pSMjR2UanXK6l4jSdBdpmxtlOaDZzQIHydgd8M8MXFE\nKpVJJ+cP+Q5FCxlK1IbVxQbGJzHgglEOwmS4uvkzNnYs0/yJE0cTQqN31VjatDa1Y61tfOp0dNkN\ns/sTn32GooWMZFSD53ID41tj7RImRYUrZSaJ2DZRpnSK/jyUeV+Gd3vZ2NiQiYkjMng+VKUy5XVe\nuAZFC0lN1F08dfYodeNimLKk2WxKubyvr/Etl/dpSx8XjJIrwilNOHSU27j3sFFXXMmbODSbTalU\nJjselt1yPTFx3Gmx5RsULSQ1oxqY3l5kVqcND4Y3zSmsPpDWsNTrdQHulu6mZO2/d0m9XtcWvqzn\nS7kgnNKEQ0e5jXsPm3XFp+HdLouLS+LDEnOfoWghWhg2lr5rnJoC7M+0Qvu870xUdBiWtmipdPLr\nROdvRZto6Q1nVvOlXOnNJwmHrn1e4gpHW+nVu3+Jbx7QxcUlqVSmZGLi+M3ySk+uPihaiDaCliuu\nrKzIxMR9nUZuo2MAs+nZhu1jsRu+7HrbutBlWBqNxp4eI1DTvs+N7flSYV62rHvzWZzcHfcetjxT\nefB89pazPMTHJShaiBF6l/7tGr9sPS152MdiFLoMy8bGhgxuK16r2T+3RqehDDMervSC44Qjr54W\nF4YNdeKKNy9PULQQ7eytqM8JUJOJieNSKo1Lubwvk56tzX0ssmpMdTWSrjS2eYtPb3hMLW/WcY+w\n8Jn2TMURqT54MFyZN5UnKFqIdsI8GisrK5mvHrKxj0XWjakuw+Lr0EkQprbNT1JmXFveHHdiusn6\nG1VcuiZCw/AlnD5B0UK043pFdaHRNY2uOLrifk8bDt35klR4uFI+wtA17JQmr6KI1FEdI5dwRfzn\nBYoWYoSiVlS6g91FV5lMY9hdLx9pw6fzFOx4E7S7Q9ButjeuiP88QNFCjNFoNGRlZcW5k5VN4npP\nOq9ENQo6jEcaw560fEQx4ro8a0nLr+2y3xVI4+OHhXujFAeKFmKErOd1ZElRvUxZYaKsDRMBaY1z\n3PIxKn6645+0/GbhRdq7rYJ73iuiF4oWoh16G+gOtoWJshZFBOyey3T05go0E1vhj4qfqbqWpPxm\nVe/Z3hQLnaLlFhACYGtrC+XyLICjnW+OolSawdbWVnaBSkGr1cLm5iZarVbk30xPT+PkyZOYnp42\nGDKiu6y1Wi3Mzz+OnZ01XL/+EnZ21jA//3hg3ov8CYA3cePGm3jiiZ/H6dOPYWbmEC5evDzyOVHL\nx6j4maprScrv9PQ0lpfPo1Y7hcnJE6jVTmF5+bzxOpDVc0kO0KV+dL1AT0sm5KnnU+RhLt2Y8D7F\nKWtRnh/18M/dZ5rdMDErT0vaMGe1RxG9m/kHHB4iJsjDvA4bBsH1hlZX+EzuRhulrEUVn1HyvF/Y\nmD+aYlT88lDX8obr9dpnKFqIMXyvuKYnFrruxdG5fDVICHR3Ija9yVpc8TlKBNj0tESJX5T/u4iP\nYY6C6/XadyhaCAnBpKfFRbd+LzrDl/WZT0nE5yiD2itseo+mqFanZGHhnDP56Cp5Neyu1+s8oFO0\ncCIuyRUmJ/i5PFm51WrhhRdewK23zkBH+GZnZ/HWW1sArna+uYobN76GcvlOLfdP9vxtzM7Ohv5m\n1ETUM2cewfb2NVy5cgFvvPEVfOMbr+Hnf/5/hVK34OMf/43IE3KLSJzJzsPuEXdyvA1crtckAF3q\nR9cL9LQQDXR73Y1GQ5s722aPLI4bPvh07vThGxxy6Q4N2d6IrPf5Oocm2MOOjiu77vbiwuZ8JBrg\n8BAhozHRUNqYQBkn3OFbox/XEr5Bw2B7Amn3+Trn0nRxfXt+m0Tfgn+tM5F5LdNdd13ZnI9EwznR\nAuAhANcAvArgo0OuOwngBoCfHHKN/hQjhaC34TU9t8WVwxuzOITO9mRMk5ux5amHnTRfoq4SO3v2\nyY4n76AANTl79olI9x8mDl3aEC+vk4xdwCnRAuAWAK8DmAFQAvAFAIdCrvv3AP4NRUs+cKmSDza8\nCwvnUp01k1W84vb+sza8NtLKpEckLz1s3SdYD3q20gwN6l6JRg+Zf7gmWh4A8Nmez08FeVsAPAng\n5wD8MkWLfXQbF5dWEgQ1itXqVKJGNut4JREhWRleW2llWpi5JL6TkCZ9oq4Sq1QmU50VpHN+VNL4\n+p7PPuOaaHkYwFLP5w8C+NTANT8AYK3z/lcoWuyi27hk3bsfJKzntbBwLpYxdyVeSURIXoZswvDd\nI2Ly9HTdJ1gHCRQdy917y6iuib26DrAkZvFRtPxTAPfLrmh5eMj95Jlnnrn5Wltb05+CBcKEcTHh\nnk1jdIfFMc59XXI7u94rzOp0YJfTJIwkc0HixDVtHY/qBel+r0M46miXoqaRK52RIrG2ttZnx10T\nLQ8A+M2ez3uGhwB8pfP6KoA/AvCHAH4s5H5GErGo6DYuzWZT6vW61kZARy9IR0/cl8bNBeO9N63W\npFKZNOJJ8JlGoyGDy9CB2tB0SlIf0pb/qKvEdJY9W94zlzojRcU10fK2nom45c5E3HuHXM/hIYvo\nNMS9jWnvjqJpGhyd4dPRoLo+DDHKoNkUNN2wVKt3ClCTWu2Ik2mWJSsrKx0Pi/S87pGVlZXA69PU\nB915b6Ms2XqGD52RPOOUaGmHBw8B+DKA1wA81fnuwwA+FHAtJ+JaxpQXolqdknq97txQU1pc8GQE\nMarxzWLcvtFoWNvaPyou5V9cT4uL9SEPmOiMuFTOXMc50aLzRdGil27FSrszrKnGdNcQr0ncTauK\nxqj9LmzNEYgapixwccLl2bNPdITLPSPntNArYI44c2BGXediOXMZihYSCZ0Vy2RjanqiYl4YlgdZ\nbbPukpF1KSyDxFk95PoQpU5cq8dR6oHL5cxVKFrISExULFMu1qT7khSxlzNsgqTtfS9Ghck2rnl9\n0uCaMY+Lj96KqPUgT+XMFhQtZCQmh3N0HkTo2w6wcTBleMLum1Q86CgrLhjZpGXDhbDnCV+9FVHr\ngYthdx2KFjISkxVrdfWSVKtTMjb2bqlWp6wOO/nSy8mqF5nEAOepEda96Zhrgsa18Azis7ciTj1w\nxbvoCxQtJBKmhnNKpQkB9gtwQoD9UiqNWxt28sHA+hDGQfLUCOvadMy14QvXwhOE796KuG2RywLS\nJShaSGR0V6x6vS7AbX2NDXCb1Ot1a+F03cC62IuMQtEaYdOrsYJImsauGvlB8uCtKFo9sAFFC8mM\ntmi5u6+hB+5KLVri4nLD4ouB8QWded17L5OrsYJI4ynxSQgvLi5JpTIlExPH6a0gIkLRQjKk2WxK\nubyvr6Evl/ex0RnA1V6kb+gcEgm6l4nVWEGkvZ8vQribnhMTR6RSmZTFxaWsg0QcgKKFZEq3YRob\nO0qDPIQ0vUj2QPUf8RB2L92rsYLQ4SlxXQj7IqyIfShaSObQqJrDhwmXNtA5JJL0XrrKuS6D7nK9\n82kIi9hFp2hR7fu5g1JKXAsTIbZotVqYmTmEnZ01AEcBXEWtdgrb29cwPT2ddfCsojMtXEjXv/7X\nn8SnP/0ZAO8E8A2cPfuz+KVf+qSVZ9vAhTQmbqKUgogoHfe6RcdNCCF62NraQrk8i3ajDwBHUSrN\nYGtrK/I9Wq0WNjc30Wq1DITQHtPT01hePo9a7RQmJ0+gVjuF5eXziQygznslodVqYXn5nwB4AcDz\nAF7A8vI/MZ5HNsuCrjTOS/klhtDlstH1AoeHSE7IYqO3rIeWTAxfmFo9ZJOgoZPx8cOysrKSKizD\n4qOrLMRNszRpnHX5JWYA57QQ4jZpGt+kEy6zmAjZa6BocMLZmzfPCVCTiYnkk2qHpbeusmAzT3kM\nQ36haCFGYSOQDh0GI0kebGxsSK12pK83X6sdNjYRstegVatTe5bCc+VIP930Gh8/LO1TzaOvZBpk\nVBnTdZ6UTRGcJMwUyn5A0UKMwUYgPVmtomg0GnuMIVCTRqOR+J5hRnSvQXteBjcd1HlAZ17ET7PZ\nlJWVFZmYuG9PWi0snItc90aVMR2Cw3Y5jhtmLrH2B4oWYoS9jcCaVCqTqYxeEcmqMW17Wu4U4IAA\n9wlwQKrV2cRGZpiA3WvQmjJ4vEPaOOdVQAeVj2p1KtY+MlHKWNp9XbIox3HCzCXW/kDRQozQ3whc\n6hi/g1KpDD/JOW+9YR1ksRHYrpFZE2BDgDUjm7GF/b9UGtcW57z3ogfLx8LCuUADPMz7EqWMpa2b\nWZVjHUNkxB0oWogR+o1eNI9LXnvDSRk828a2mNNlZKL0YoOepSvORehFjzoHaZj3JegeNsLpGoNl\ncHFxydmwFhmKFmKM1dVLUqlMCnBwpMeFPZ1+bAm4UUZEh5GJmremDFoRy1ZU70uehJsOumVwcXGJ\nHShHoWghRmk0GlKpTAV4XPoNR71el7GxY32N6uDeEy730kYRJ+y2jKxNz1bWZ91k/fwsGOV9ybtw\nSwrTym0oWohRms2mLCycG/C49Pf0VlcvSbU6NTD5sn/vibNnn/S25xNXHNgYzshionTWojPr52eN\naeHma/oOhrsIw4k+Q9FCjNFrrCuVSSmVJvf0XhqNRo/xvCTAfgFmpX+57ZqE7UXhOkl6bTZ6ekkn\nShO/MSUsfJ2PFhRuelrchqKFGCHqipCg5a7V6h1y221He77bCPXSuE7SXpuNXnHwRGk20CQevhr5\nYeEu4nCiL+gULbeaPdmI+ET3sL6dne8HsAlgFrXaPfhn/+xj2L9/P2ZnZzE9PY1Wq4W33toCcBXt\ng/3+AMB/gch3er77YwBf7/l8FTdubGN2djaDmMVjdnZ2IH7Rwn7mzCN48MH3Ymtr62Za6aR7IN2j\nj/443nzzHZ2wtQC8ibe97QewtbXF03RJJHbr+t6DOV0uQ8PCbbr+EUfQpX50vUBPS2Y0m00plSY6\nwz0nBNgvpdJ4YO8rqFcz+N3Zs0942/Nxude2O1H6uY7H5ZgANVlcXBIRf+cpEHvk0dOSRVhYz6IB\nDg8RXQyuVhg8P6Zc3hdrae3gdz5XbJfDvri4FDhnyIVlny6nG9nFZWE+DBfC7et8oKygaCFaGKx4\nRdkXIg9GdWNjY8/5NePjhzsemOx6oWzM/cLXupBluF3y9vgCRQtJTdIdOH0nL0Y1KP8qlcnAg/iS\niM4kRoGNOSkCXF4dH52i5ZZsZtKQrOlOaGtP5gSAoyiX78TTT38EtdopTE6eQK12CsvL5/smtLVa\nLWxubqLVamUR7FS0Wi3Mzz+OnZ01XL/+EnZ21jA//7iXcelOyu3Nq09+8uP47ne30Z5ADCSd/Hzx\n4mXMzBzC6dOPYWbmEC5evBzpd0FlqjtJkpC80D9RH/BpkUEu0KV+dL1AT4tWwnrMw3rFYb/p9VJU\nq1OysHDOq150HntIg3mV5cm+9LSQouDCvBqfAIeHSBRGDYXEqXj9Bqm7odzdXlXYsCGxer2e+pye\nrOcF6DqoMa2wM3mIYl7Ie3rkPX5dihJPHVC0kJHoPvBu15g1vd7YrNeolkoTUi7vSzW/JUwY2mzQ\ndM7T0eEt6Y17XuYQ6cJEerhkPPOU3y6lq+9QtJCR6B4K2TVmz3f2cNFz3yxoNptSr9e1GOege9hc\ndmxiSEaX6ztu2PJuJEzmlYmyFjc/8jQ8mCfx5QIULWQkphrIvYck+tkw9XuONgRoxhZfQcLQ9rJj\nU/N0dAiIOGErgpEw15GIV9ai5G2S/MjLnLE8iS9XoGghkTAxWazZbJ8AHXRfkz1l3feOs/vvsHuY\nXHacNAyuNLBxhih3z1TaEGBt5KRwH9GdV0lEQhQxkkYMuVoW45AX8eUSFC0kMqYa/bBVKyZ6yqbm\nAcTZ/XdU2LoCrjs01L5vU4DnpVqdMtpw21rJkKQsRQnbxsaG1Grv6syVOiHAAalWZ2+K4zx5X3Tm\nVZLhtyjXpzHaeVhVkxfx5RIULcQpTFZyU/fW2ZsKEnBtL85tAtwt5fI+4423aY9EGuE4KmyNRkMG\njyQAqlKt7s+l4dCZV3FEQtQyv7fOrUmlMimNRiNSmPLgHcuD+HIJ50QLgIcAXAPwKoCPBvz/pwF8\nsfN6EcCRIfcykWbEICbdqSbnbLgmtFxt7E33PNueliN9eVypvEvGxo7RRR+BqOUmTj52jXa1eqcA\nNanVjhTOeLtaH33EKdEC4BYArwOYAVAC8AUAhwaueQDAPtkVOJ8bcj8jiUbM4aIAiEJYbyptY2Vq\nrkFWmB7jD8rjIhwpkQVxPAi7p4kzD0g6XBMtDwD4bM/np4K8LT3/nwLw9SH/155gxDw63KlhYsGk\nq9bE3BxTcw2ywkb4gvKYLnozxN+bid4ukg7XRMvDAJZ6Pn8QwKeGXP+3eq8P+L/2BCN2SOOhGCUW\nbLhqdRpnE3MNssSGgAjKY7ros8N1MU38QadoUe37JUcp9TCA94vIhzqfPwjgfhF5IuDaUwA+DeA9\nIvLtkPtJ2jARv2i1WpiZOYSdnTW0D9u7ilrtFLa3r/Ud1miazc1NnD79GK5ff+nmd5OTJ3DlygWc\nPHky9v1arRa2trYwOzs7NB5Zxz9OOKNcR9wkSf5dvHgZ8/OPo1SawY0b21hePo8zZx4xHFKSN5RS\nEBGl4163arjHGwDu6Pn8zs53fSiljgJYAvBQmGDp8uyzz958Pzc3h7m5OQ3BJK7SPR14Z2fv6cA2\njWP/6a1t8ZDm9Nbp6elI4e+e2Dw/f6rPONiIe9colcvtuA8zSlHjEwUKoPikSbM4+dzLmTOP4MEH\n3+ttXrGcZcP6+jrW19fN3DytqwbA27A7EbeM9kTceweuuQPAawAeiHA/nV4p4gEuuaFtzp+J+/+4\n10UJTxbp7vKkY1dJu+TclfplE5Yzd4BLc1ra4cFDAL7cESZPdb77MIAPdd5/BsB/BvB5AC8D2Bhy\nLzOpRpzGpUmXJuZR6GpAdTbEJubSRBFmRTSgaUibZj7MmdJNf5rZ2eSRhOOcaNH5omgpLnmddKnL\nUOs2+LrvF0VQ+WZAXSiTadOsiEJxN80uye5Oy7fJwsK5rINWSChaCPEIXYbahMG3faKzTgFnWky4\nMrygI81c8mTaoNlsdg53zeeuyr5B0UJyhws9WlO46mnpvW8WJzonNaA2xIRr3omgM67i5lme61gQ\nCwvnBLjbG69enqFoIbnClR5tUqIYg9XVS1KtTsnY2EGpVqdSz2nJosc8LJ5JNtRLYkBNiImgsLg4\njNUNZ/dQzqT1pSjixTXhWWQoWkhuSNOwuND4RhVc3evGxo6lFhtZxDtKPG0IKt1iIixerhq8tOGy\n5aXKul52KdqwmKtQtBCvGNaIJTVCLnhnbM/jyIo44TdtsHSm5ah7uWjwktaXZrMp9Xrd2nEMLnlN\nXRJRRYWihXhDlO354zakroiAqAbExaGGOLgWfl1iIkq8bB0fEfUZScr+rpfv3UbneLhSL7thoVBx\nB4oW4gVRG7G4RsgVI1pcT8uaVCqT0mg0Yt1DpxHRcT8X8iWJZyJOfdm7X4m51TSu1EsXvT1Fh6KF\neEGcRsx0b9MUUQ2Ii0MNceiGv1q9U4Ca1GpHIsdjcXFJKpUpmZhwL+5ZT2w2PZ9rbx28JMBtMjZ2\nVHt8XaiXjUZDKpUpJ9oGsgtFC/ECk42YSyKg0WjIysrKSM+D7y7rJAZhcXFJgJrTRiSrfLHhmQiq\ng9XqlNTrdSPxzbJerq5ekkplUoCDmXt7SD8ULSQVNhvpLM/ysUGRXNFxjWyz2ewYkWN9v5mYOE4j\nIvY8E7aFRBb1cjct16S9A66fqxHzCkULSUwWRjZKY+Bjg+GCOzwpSdI7bnw3NjZkYuLIHiNSqfAM\nmC62BIVP9StJWPsFdXfr/nukUom2J1KcdtGntHQFihaSCFeNrK/eClcmHsYlTXonmwT6XMeIHBWg\nJouLSzqikRtoBHdJWjaTThaPuxTc17YqayhaSCJcNLKuCqko+Bj2OCuehu1+G9XIdhv58fHDUqlM\n5lawUHikR9fGeVG9VnGXgvtY312BooUkwsVK56KQ6hLFELk0ITgKUdJbd28y7wadvW896GgLopa1\nJEvBXW6rXIeihURmsBK7ZmRdFFIi+R3jHpXeruaHqzC99GEzLZMsBWdeJ4eihURi2LkqSY1s7291\nGWsKKbsMS++NjQ2p1Y709SZrtcPsTYbA3rdebE5MTrIU3LW2yhcoWshITBjeXhFUKk1Iubwvl0MI\ntg1RVktEg57ZaDRkcF8VoBZr99sikXeBmwW65lONIqkAcamt8gWKFjIS3YY3yRiwr9g0RK7Nh2h7\nWu6U9mqf+wQ4INXqLD0HQ2Dv2w5pVhbZEEEkHIoWMhLdhrdfBG0IkG+XuA1D5GIvvX+Trg0B1jIP\nkw+YNH66hnN9Jmldca1TUFQoWkgkdBpenz0tSRtu0w2+a/MhuvFdXFyKVG7yYhBdRseeOnkw2Enq\nioudgqJC0UIiY2oMuFQal3J5n/MucZcbbpe8GoPptLi4NLTcuJyueSGN0c2bwU4SH9c6BUWGooVk\nhonVQ0mfH+Va1xvus2ef7Ex8PShATc6efcJ6GOKmk650padmOGmMbh4NdlzPcbPZlHJ5X185LZf3\npS5vLLfxoWgh3pOk4sft3bvecLsiquKmk450padmNPS07CVup6VUGu8MZd8nwH4plca1raBkuY0O\nRQvxmiT7xyQZSnG94Y5i/G306mx7WlwaFnOdNPPSir6qabd+NTvlrKlxBaV77YnLULQQ7dhyeYZV\n/O7kz7AeTHsp7rukvRT3mERdijuq4c7S1TuqEbTZq0t6bksSg9iflyci52VR4eqhZJhdQZnMw1hU\nKFqIVmwax6CK3z5Mb2po45Jm07OwhtsFV2+v8a9Wp2Rh4dzN+UK2e3VxDVxSg5jFBnZFMN5FiGNc\nzK2gpKclDhQtRBt7K2K0I931Pe+LUqlMysTEfUN7MPV6XQZPYgXuknq9ri0cWTVAzWZTFhbO9Qmo\nhYVzue3V2T4qYJQ4zYOxd0GAu4qpFZRM5+hQtBBt9Hs+LnVc9u+SUmlcFheXjDxzsOJ3h4aGCYi2\naLltoHd+W2LR4pKrN0hAVatTzogq3dgUjC4NwZnCJQFeBAZFUB5Er2koWog2+idFHhDguZ55IzVj\nwmWwop89+0RnyOAeCVr6q3slgEsNfZiA6npf8tirs9VjHSZOXSoDaXBJgBeNPIheG1C0EK2srl6S\nSmVSgO7kyN6hmymLk3PXZNhqktXVS1KtTsnY2EGpVqdSNxCuuHqHGc889+KyXhmVF2OfF/HlG0z3\n6FC0kFQEGYtGo9HxZBzra8QnJo4bb8TjGA/dhs4VUeCKgHKZpHkVlrZ5MjosP/bJi+i1AUULScww\nd+bi4pIMruqw0YjnyXikwRUB5SJp3fCjVpDZMvYm85jlxy5st6JD0UISEaWSLS4uSaUyJRMTx632\n2EbNadENG3h/MG0cbJUFzn/IH/RwRYOihSQiqjvTtkG3vUOqT8aD4sodN3zaTd7YK88nrKOj0Sla\nbgEpDLOzs3jrrS0AVzvfXMWNG9uYnZ3tu256ehonT57E9PS0lXBtbW2hXJ4FMAfgJIA5lEoz2Nra\n0v6sVquF+fnHsbOzhuvXX8LOzhrm5x9Hq9XS/qy0XLx4GTMzh3D69GOYmTmEixcvZx2kTIhabk2S\nNi92y/jRzjdHjZVxYhfb7WXRoWgpENPT01hePo9a7RQmJ0+gVjuF5eXzmVc2m0bJF+Phk7hKQqvV\nwubmJlqtVt/7IJKW21H3jRPWtHnhgvAiZtBVzkhEdLlsdL3A4SHjRHVn2nR72hob9sVN78qQiAl6\nh+dKpQkpl/dFGqqLUx7jDAGOuq+uvHB5/kORhjhM7JDrw1BzloBzWohpeitj75k4JrE9IdJF49HF\nF3EVl/54NTubBeqNY9gOw/V6PdH5UzrzIkkZN10vimR4dca10WiMPDONtKFoIUbpb6QvdQzL3blp\n0JrNptTr9UAj5hI+iKu49HstNqR9yrNeb9Jez8glAW6TsbFjifdpySovTAuKvIrjIHTGdXdDzoO5\n9IbqxjnRAuAhANcAvArgoyHXfArAawC+AOD4kHsZSDISh91GvymDO+T63qD51qvMm9tel6dlWLpE\nfUbcYZ/sVtWZq395HoYcRFdc9x59kp/20RROiRa0J/O+DmAGQKkjSg4NXPOjAP5t5/3/BOBzQ+5n\nJNFIdHYr5fNGesJZUaRepW2SzDeZnLxPSqVxKZf3xfJgRBGe3WvGxg7K4Ongvpw9ZENQuJ4GOtEV\n1+BDZu+RSiX90SJ5xTXR8gCAz/Z8fmrQ2wJgEcAjPZ9fAfD2kPsZSDISl+45P4MnK/vcoGXdq8yb\n16RLEu9Vb1rESZc4hqc7DDjsepeH4GwJCpfTQDc64ro3X/6FlEpj8uKLLxoIcT5wTbQ8DGCp5/MH\nAVSp2y0AAAhhSURBVHxq4Jp/DeBHej5fAXAi5H4Gkowkodls5uqk4Sx7lb4NS0XFdpomEZ6jDJXL\nYtLmqjpX00A3OuLazZdq9U4BalKrHclVvdYNRQuxSp4atCx6lXl2wdv2XiVNS5/LsM9hzzNcPRQd\nnaLl1pjbugTxBoA7ej6/s/Pd4DU/OOKamzz77LM338/NzWFubi5tGEkKpqenM9+AThdnzjyCBx98\nL7a2tjA7O2slXt0N7XZ29m5o53u69m+adhSmN03rbjQ3P38KpdIMbtzYjrTRnM9l2Oew55nvfOc7\nqFbfhTffzF+9Tsv6+jrW19eN3Fu1RVCKGyj1NgBfBvDnAfwBgA0AZ0TklZ5rPgDgr4nIX1BKPQDg\nH4rIAyH3k7RhIsQlWq0WZmYOYWdnDV3DXqudwvb2tVw0bhcvXsb8/ON9IuLMmUeMPrPValkVnoQM\nkvd6rROlFEREabmXDoGglHoIwCfRXkm0LCIfU0p9GG2X0FLnmk+jvTT6jwE8KiKfD7kXRQvJHVkY\ndptQRJAikvd6rQvnRItOKFpIXqFhJyR/sF6PhqKFEEIIIV6gU7TwlGdCCCGEeAFFCyGEEEK8gKKF\nEEIIIV5A0UIIIYQQL6BoIYQQQogXULQQQgghxAsoWgghhBDiBRQthBBCCPECihZCCCGEeAFFCyGE\nEEK8gKKFEEIIIV5A0UIIIYQQL6BoIYQQQogXULQQQgghxAsoWgghhBDiBRQthBBCCPECihZCCCGE\neAFFCyGEEEK8gKKFEEIIIV5A0UIIIYQQL6BoIYQQQogXULQQQgghxAsoWgghhBDiBRQthBBCCPEC\nihZCCCGEeAFFCyGEEEK8gKKFEEIIIV5A0UIIIYQQL6BoIYQQQogXULQQQgghxAsoWgghhBDiBRQt\nhBBCCPECihZCCCGEeAFFCyGEEEK8gKKFEEIIIV5A0UIIIYQQL6BoIYQQQogXULQQQgghxAtSiRal\n1H6l1G8ppb6slKorpfYFXPNOpdRvK6V+Tyn1JaXUE2meSQghhJBiktbT8hSAKyLybgC/DeAXAq75\nLoC/KSJ/BsAPA/hrSqlDKZ9LIrC+vp51EHIF01MvTE99MC31wvR0l7Si5ccB/OPO+38M4CcGLxCR\nPxSRL3TefwfAKwBuT/lcEgFWPL0wPfXC9NQH01IvTE93SSta/pSIfAtoixMAf2rYxUqpWQDHAfxu\nyucSQgghpGDcOuoCpdS/A/D23q8ACIC/E3C5DLnPOIBfB/Bkx+NCCCGEEBIZJRKqM0b/WKlXAMyJ\nyLeUUu8AsCYi9wZcdyuAfwPgsyLyyRH3TB4gQgghhDiHiCgd9xnpaRnBvwLwMwCeA/BXAPzLkOt+\nGUBjlGAB9EWMEEIIIfkiraflAIB/CuAHAWwD+CkR+f+VUt8P4DMi8heVUn8WwP8L4EtoDx8JgKdF\n5DdTh54QQgghhSGVaCGEEEIIsYXxHXGVUstKqW8ppa72fHdJKfX5zuurSqnPd76fUUr9157/ne/5\nzQml1FWl1KtKqX9oOtyuEpKex5RSv6OUelkptaGU+h97/vcLSqnXlFKvKKXe1/N94dMzTlqybI4m\nJD2PKqX+P6XUF5VS/7IzIb/7P5bNIcRJT5bP4YRtcjpsg1SWz3DipqfW8ikiRl8A3oP2MuerIf//\nOIC/03k/M+S63wVwsvP+BQDvNx12F19B6QmgDuB9nfc/ivaEaAD40wBeRnvu0iyA17HrXSt8esZM\nS5bNZOm5AeA9nfc/A+Dvdd6zbOpNT5bP4Wn5DgDHO+/HAXwZwCG052P+7c73HwXwsc57lk+96amt\nfBr3tIjIiwC+PeSSnwJwsefznom4nZVJEyKy2fnqVxGwkV0RCEnPPwHQ7SFMAXij8/7HAFwSke+K\nyBaA1wDcz/RsEzMtAZbNoYSk5z2d7wHgCoCHO+9ZNkcQMz0Bls9QJHiT03cifINUls8hJEhPQFP5\nzPTARKXU/wzgD0Xk93u+nu24j9aUUu/pfHc7gG/0XPMNcFfdXv4GgI8rpb4G4O9j9ziF2wF8vee6\nNzrfMT3DCUtLgGUzCb+nlPqxzvufQrthA1g2kxKWngDLZyTU7iannwPwdgneIJXlMyIR0xPQVD6z\nPuX5DPq9LN8EcIeInADwEQCrvWPgJJSfQ3vTvjvQNrq/nHF4fCYsLf8ALJtJ+N/RPm9sE8AYgLcy\nDo/vhKUny2cE1N5NTgdXonBlSgxipKe28pmZaFFKvQ3ATwK43P1ORG6IyLc77z8P4PcBHERb5f5g\nz8/fiX63fdH5KyLy/wCAiPw6gJOd78PSjekZzmBa3t95/xbLZnxE5FUReb+InARwCe10A1g2ExGW\nniyfo1HtTU5/HcCviUh3T7FvKaXe3vn/OwA0O9+zfI4gTnrqLJ+2RIvC3vGs0wBeEZFv3rxIqe9T\nSt3Sef8uAHcD+ErHzXRdKXW/UkoB+MsI38iuCAym5xtKqT8HAEqpP4/2+CvQ3vzvf1NKlZVSd6Kd\nnhtMzz5GpeWrnfcsm9HoS0+l1HTn7y1oH/2x2PkXy2Y0IqUny2ckgjY57W6QCvRvkMryOZrI6am1\nfFqYZbyK9rDPmwC+BuDRzve/AuBDA9f+JID/BODzAP4jgA/0/O+H0N6g7jUAnzQdbldfQekJ4Ec6\n6fUygN8BcF/P9b+A9sz3V9BZFcP0jJ+WLJuJ0/MJtFcWXAPwfw5cz7KpKT1ZPkem5Z8F8D0AX+jU\n7c8DeAjAAbQnNH8ZwG8BmOr5DcunpvTUWT65uRwhhBBCvCDribiEEEIIIZGgaCGEEEKIF1C0EEII\nIcQLKFoIIYQQ4gUULYQQQgjxAooWQgghhHgBRQshhBBCvICihRBCCCFe8N8BcDq7hvBz0YcAAAAA\nSUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x11752b860>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(thechosen.inferreddate, [random.uniform(0,1) for x in range(482)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.collections.PathCollection at 0x122fdf4e0>"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAFwCAYAAACWzVC/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X90XPV95//XWz9GGkuWbIMQBBwLcByUDU6ARWGT9Ftx\nUrtLtych7SbdpWmT0qVJuy50v5x+A3S/xdmc5CRNe7Ih/fL1IesWf7e1cb7dQxI43eJwsLqr7DbS\nAgE2EoRfcoEESyEJXVFju/i9f8y9mjt37oxmNHc0V5rn4xwdxnfuj8/93M9Ib+a+5jPm7gIAAMii\njlY3AAAAoBIKFQAAkFkUKgAAILMoVAAAQGZRqAAAgMyiUAEAAJmVSqFiZoNm9v+b2ayZfdfM3mVm\nm83siJk9ZWYPmNlgGscCAADtI613VL4k6S/dfVTSOyQ9KekWSQ+6+1slPSTp1pSOBQAA2oQ1OuGb\nmQ1IetTdL44tf1LST7v7cTM7V9KEu1/S0MEAAEBbSeMdlQsl/dDM/tTMHjGzu8xsg6Rhdz8uSe7+\nsqRzUjgWAABoI2kUKl2SLpf0/7j75ZJeU+G2T/ytGubqBwAAdelKYR8vSnrB3f9H8O//pEKhctzM\nhiO3fuaTNjYzChgAANYZd7c09tPwOyrB7Z0XzGxHsOh9kr4r6RuSPhYs+6ikr1fZBz8p/dx+++0t\nb8N6+qE/6cus/tCf9GeWf9KUxjsqknSjpD83s25Jz0n6NUmdkr5qZtdLOibpwykdCwAAtIlUChV3\nf0zSlQlP/Uwa+wcAAO2JmWnXmfHx8VY3YV2hP9NDX6aL/kwX/ZldDc+j0nADzLzVbQAAAOkxM3lW\nwrQAAADNQqECAAAyi0IFAABkFoUKAADILAoVAACQWRQqAAAgsyhUAABAZlGoAACAzKJQAQAAmUWh\nAgAAMotCBQAAZBaFCgAAyCwKFQAAkFkUKgAAILMoVAAAQGZRqAAAgMyiUAEAAJlFoQIAADKLQgUA\nAGQWhQoAAMgsChUAAJBZFCoAACCzKFQAAEBmUagAAIDMolABAACZRaECAAAyi0IFAABkFoUKAADI\nLAoVAACQWRQqAAAgsyhUAABAZlGoAACAzKJQAQAAmUWhAgAAMotCBQAAZBaFCgAAyCwKFQAAkFld\naezEzOYkvSrpjKTT7j5mZpslHZa0TdKcpA+7+6tpHA8AALSHtN5ROSNp3N0vc/exYNktkh5097dK\nekjSrSkdCwAAtIm0ChVL2NcHJB0IHh+QdG1KxwIAAG0irULFJX3TzKbN7F8Fy4bd/bgkufvLks5J\n6VgAAKBNpJJRkfQed/+BmQ1JOmJmT6lQvETF/w0AAFBVKoWKu/8g+O+CmX1N0pik42Y27O7Hzexc\nSfOVtt+7d+/S4/HxcY2Pj6fRLAAAsAomJiY0MTHRlH2be2NvdJjZBkkd7r5oZn2Sjkj6lKT3SfqR\nu3/ezD4pabO735KwvTfaBgAAkB1mJne3VPaVQqFyoaR7Vbi10yXpz939c2a2RdJXJW2VdEyFjyf/\nJGF7ChWgQbOzs5qamtLY2JhGR0db3RwAbS5ThUrDDaBQARry27/9O/rjP75Lhf8neEF79tygL3/5\nS61uFoA2RqECQFLhnZS3ve0KSX8jaaekxyVdpZmZh3lnBUDLpFmoMIU+sIZNTU2p8E7KzmDJTkkX\nBMsBYO2jUAHWsLGxMUkvqPBOioL/vhgsB4C1j0IFWMNGR0e1Z88Nkq6StEPSVdqz5wZu+wBYN8io\nAOsAn/oBkCWEaQEAQGYRpgUAAG2BQgUAAGQWhQoAAMgsChUAAJBZFCoAACCzKFQAAEBmUagAAIDM\nolABAACZRaECAAAyi0IFAABkFoUKAADILAoVAACQWRQqAAAgsyhUAABAZlGoAACAzKJQAQAAmUWh\nAgAAMotCBQAAZBaFCgAAyCwKFQAAkFkUKgAAILMoVAAAQGZRqAAAgMyiUAEAAJlFoQIAADKLQgUA\nAGQWhQoAAMgsChUAAJBZFCoAACCzKFQAAEBmUagAAIDMolABAACZRaECAAAyK7VCxcw6zOwRM/tG\n8O/NZnbEzJ4yswfMbDCtYwEAgPaQ5jsqN0maifz7FkkPuvtbJT0k6dYUjwUAANpAKoWKmV0g6eck\n/YfI4g9IOhA8PiDp2jSOBQAA2kda76h8UdLvSvLIsmF3Py5J7v6ypHNSOhYAAGgTDRcqZvbPJB13\n9+9IsiqrepXnAAAAynSlsI/3SHq/mf2cpLykjWb2HyW9bGbD7n7czM6VNF9pB3v37l16PD4+rvHx\n8RSaBQAAVsPExIQmJiaasm9zT++NDjP7aUk3u/v7zewPJL3i7p83s09K2uzutyRs42m2AQAAtJaZ\nyd2r3WWpWTPnUfmcpF1m9pSk9wX/BgAAqFmq76isqAG8owIAwLqyVt5RAQAAaAiFCgAAyCwKFQAA\nkFkUKgAAILMoVAAAQGZRqAAAgMyiUAEAAJlFoQIAKVpYWND09LQWFhZa3RRgXaBQAYCUHDp0WNu2\nXaJduz6hbdsu0aFDh1vdJGDNY2ZaAEjBwsKCtm27RCdOHJW0U9Ljyuev1rFjT2poaKjVzQNWFTPT\nAkDGzM3NKZcbUaFIkaSd6u7eprm5udY1ClgHKFQAIAUjIyM6dWpO0uPBksd1+vQxjYyMtK5RwDpA\noQIAKRgaGtL+/Xcqn79aAwOXK5+/Wvv338ltH6BBZFQAIEULCwuam5vTyMgIRQraVpoZFQoVAACQ\nKsK0AACgLVCoAACAzKJQAQAAmUWhAgAAMotCBQAAZBaFCgAAyCwKFQAAkFkUKgAAILMoVAAAQGZR\nqAAAgMyiUAEAAJlFoQIAADKLQgUAAGQWhQoAAMgsChUAAJBZFCoAACCzKFQAAEBmUagAAIDMolAB\nAACZRaECAAAyi0IFAABkFoUKAADILAoVAACQWRQqAAAgsxouVMysx8y+bWaPmtkTZnZ7sHyzmR0x\ns6fM7AEzG2y8uQAAoJ2Yuze+E7MN7v73ZtYp6VuSbpT0i5Jecfc/MLNPStrs7rckbOtptAEAAGSD\nmcndLY19pXLrx93/PnjYI6lLkkv6gKQDwfIDkq5N41gAAKB9pFKomFmHmT0q6WVJ33T3aUnD7n5c\nktz9ZUnnpHEsAADQPrrS2Im7n5F0mZkNSLrXzP6RCu+qlKxWafu9e/cuPR4fH9f4+HgazQIAAKtg\nYmJCExMTTdl3KhmVkh2a/d+S/l7Sv5I07u7HzexcSUfdfTRhfTIqAACsI5nKqJjZ2eEneswsL2mX\npFlJ35D0sWC1j0r6eqPHAgAA7aXhd1TM7FIVwrIdwc9hd/+MmW2R9FVJWyUdk/Rhd/9Jwva8owIA\nwDqS5jsqqd/6qbsBFCoAAKwrmbr1AwAA0CwUKgAAILMoVAAAQGZRqAAAgMyiUAEAAJlFoQIAADKL\nQgUAAGQWhQoAAMgsChUAAJBZFCoAACCzKFQAAEBmUagAAIDMolDBiiwsLGh6eloLCwutbgrE9QCw\nflGooG6HDh3Wtm2XaNeuT2jbtkt06NDhVjeprXE9AKxn5u6tbYCZt7oNqN3CwoK2bbtEJ04clbRT\n0uPK56/WsWNPamhoqNXNaztcDwBZZGZyd0tjX7yjgrrMzc0plxtR4Y+iJO1Ud/c2zc3Nta5RbYzr\nAWC9o1BBXUZGRnTq1Jykx4Mlj+v06WMaGRlpXaPaGNcDwHpHoYK6DA0Naf/+O5XPX62BgcuVz1+t\n/fvv5DZDi3A9AKx3ZFSwIgsLC5qbm9PIyAh/FDOA6wEgS9LMqFCoAACAVBGmBQAAbYFCBUAJJo8D\nkCUUKgCWMHkcgKwhowJAEpPHAUgPGRUAqWPyOABZRKECQBKTxwHIJgoVAJKYPA5ANpFRAVCCyeMA\nNIoJ3wAAQGYRpgUAAG2BQgUAAGQWhQoAAMgsChUAAJBZFCoAACCzKFQAAEBmUagAAIDMolABAACZ\nRaECAAAyi0IFAABkVsOFipldYGYPmdl3zewJM7sxWL7ZzI6Y2VNm9oCZDTbeXAAA0E7SeEflHyT9\nn+7+jyT9E0n/2swukXSLpAfd/a2SHpJ0awrHAlbVwsKCpqentbCw0OqmZA59A2A1NFyouPvL7v6d\n4PGipFlJF0j6gKQDwWoHJF3b6LGA1XTo0GFt23aJdu36hLZtu0SHDh1udZMyg74BsFpS/fZkMxuR\nNCHp7ZJecPfNked+5O5bErbh25OROQsLC9q27RKdOHFU0k5Jjyufv1rHjj2poaGhVjevpegbAMtJ\n89uTu9LYiSSZWb+kv5B0k7svmlm8+qhYjezdu3fp8fj4uMbHx9NqFrAic3NzyuVGdOLEzmDJTnV3\nb9Pc3Fzb/zGmbwDETUxMaGJioin7TuUdFTPrknS/pP/s7l8Kls1KGnf342Z2rqSj7j6asC3vqCBz\neNegMvoGwHLSfEclrY8n/4mkmbBICXxD0seCxx+V9PWUjgU03dDQkPbvv1P5/NUaGLhc+fzV2r//\nTv4Qi74BsLoafkfFzN4j6b9IekKF2zsu6TZJU5K+KmmrpGOSPuzuP0nYnndUkFkLCwuam5vTyMgI\nf4hj6BsAlaT5jkqqYdoVNYBCBQCAdSWLt34AAABSR6ECAAAyi0IFQE2YiRZAK1CoAFgWM9ECaBXC\ntACqYt4UAPUiTAtg1YQz0RaKFCk6Ey0ANBuFCoCqRkZGdOrUnKTHgyWP6/TpYxoZGWldowC0DQoV\nAFUxEy2AViKjAqAmzEQLoFbMTAsAADKLMC0AAGgLFCoosZqTerXzBGLtfO4AUA8KFSxZzUm92nkC\nsXY+dwCoFxkVSFrdSb3aeQKxdj53AO2DjApSt5qTerXzBGLtfO4AsBIUKpC0upN6tfMEYu187gCw\nEhQqkLT6k3rddtvNbTmBGJOntafZ2VkdOHBAs7OzrW4KsOaQUUGJZk/qdejQYf36r/+WcrkRnTz5\nnH7v935XH//4DW33h5rJ09rHb//27+iP//guSVslvaA9e27Ql7/8pVY3C2gqJnzDmkSQFO1mdnZW\nb3vbFZL+RuGYl67SzMzDGh0dbW3jgCYiTIs1iSAp2s3U1JQK76QUx7x0QbAcQC0oVLBqCJKi3YyN\njUl6QdExL70YLAdQCwoVrBqCpGg3o6Oj2rPnBklXSdoh6Srt2XMDt32AOpBRwaojSIp2Mzs7q6mp\nKY2NjVGkoC0QpgUAAJlFmBYAALQFChUAAJBZFCqoaGFhQdPT01pYWMjM/tJuEwAg2yhUkOjQocPa\ntu0S7dr1CW3bdokOHTrc8v2l3SYAQPYRpkWZtGeQTWN/zGoLAGsHYVo0VdozyKaxP2a1BYD2RKGC\nMmnPIJvG/pjVFgDaE4UKylSbQXYlYdY0ZqStdx9ZDAJn0Vo6r7XUVgApcveW/hSagCyan5/3qakp\nn5+fd3f3gwfv8Xx+iw8OXu75/BY/ePCehvaXRpuSNNrOZu8vK9bSea2ltgJwD/62p1InEKZFTdZK\nmDWLQeAsWkvntZbaCqCAMC1W3VoJs2YxCJxFa+m81lJbAaSPQqUNreRe/1oJs2YxCJwV0eteel4L\nkg7q1KnnM3le6+kaAKgfhUqbWemkaWkEYldD2u1cK+e9nPh1f/DBh7R//53q7n6vpBFJt+vMGdeD\nDz7U4paWWy/XAMDKkFFpI2lNvDY3N6eRkZFM/6FIu51r5byTVLruDz88qSuueO+ayX6s5WsAtJs0\nMypdaezEzPZL+nlJx919Z7Bss6TDkrZJmpP0YXd/NY3jYWXCe/0nTpTf66/1F//Q0NCa+CORdjvX\nynknqXTdp6amGh4Pq2ktXwMAK5fWrZ8/lfSzsWW3SHrQ3d8q6SFJt6Z0LKwQ9/rbU6XrPjY2xngA\nkHmpFCruPinpx7HFH5B0IHh8QNK1aRwLK7da9/obnZhrdnZWBw4c0OzsbNX9tWoCsLU28Vil6z46\nOlrzeFhr59wusnRdstQWrDNpTciiwi2exyP//lHs+R9V2C6l6WVQqzQmXquk0Ym59uy5yaW8Sztc\nyvuuXdck7q9VE4Ct5YnHKl335cbDWj7n9SxL1yVLbUE2KMUJ31azUHmlwnZN6CK0wvz8vOfzW1x6\nzCV36THP57fUXBDNzMwERUq4/dHYvwv7m5mZaeg4rTq/tagdz3ktyNJ1yVJbkB1pFiqphGkrOG5m\nw+5+3MzOlTRfacW9e/cuPR4fH9f4+HgTm4VmaTSsOzU1JWmrihN79Um6QPGJvloVAk0jjLzWtOM5\nrwVZui5ZagtaZ2JiQhMTE03Zd5qFigU/oW9I+pikz0v6qKSvV9owWqhg7SoNbRY+7lpPOHNsbEzS\nC5HtX5P0Ytn+SkOg9R9npRo9v7WoHc95LcjSdclSW9A68TcZPvWpT6W271TCtGZ2UNJ/k7TDzP7W\nzH5N0uck7TKzpyS9L/g3MiYMrt5///0lAdblJAXnGgnrLiwsaHFxUddf/xFJV0naIenntHv3eMn+\nvvjFz2lxcVFf/OLnaj5OPJxbadlyajm/9RYojJ5zX987mGwtI7I0CV6W2oJ1Kq17SCv9ERmVlikG\nV7cE/32LS3nfs+fGqtstF5yrN6wb3V9390bv6urz3t43e0/PgB88eM/S/vbtu6vkuPv23bXsceLh\n3D17bkxcVo9K57deA4UHD97jvb2bvK/vrd7bu2ndnNd60Mxg/FpuC1pPWQzTrrgBFCotUQyu3lsW\nWJXyPjMzk7hd2sG50v3Nu7Q5cd8rOW55OPcxl3rqOt+Vncf6CRSu1/MC0FxpFip810+bKgZXX1Vp\ngHWnpAuC58s199uJ5yRdmLjvlRy3PJy7U9Kg4gHdaue7svOorX1rwXo9LwBrB4VKmyoGVwdVDLAq\n+O+LwfPlmvvtxCOSnk/c90qOWxrODc/tVRUDuuGyyue7svOorX1rwXo9LwBrB4VKmxodHdWePTdI\nuk5SrwoB1rdIukp79tyg0dHRkvXDkKikJn478c+qq+uUurt/Shs3Xlay75UE9ornGIZzr9KePR+P\nBXaTzzd6zrXOiHvbbTev6UBh9Lxqud6NBIdXM3S83gLOQNtJ6x7SSn9ERqWlZmZm/O677/b77rvP\n77777sSsRlJINO3g3Pz8vH/605/xfH6Lb9x4qff0DPi+fXclrlfvccNznJmZWTqXvr5R7+7u8y98\n4Y8St6kUjE1aHl3W27vJP/3pz6y5DEc80JzLDVa93o0Eh1czdLxeA85A1okwLVbLaoUpV+M4tR6j\n0npJM+L29m5a82HTWgPNyevXd86rGc4lCAy0TpqFCrd+UNVqhSlX4zi1HqPSeuGMuNHlnZ3nqKNj\n67L7zLJaA83J6yevU9ux6tu2XgSBgfWBQqXNhffvZ2dnE+/jl4cpJ3Ty5LPq7+9P9d7/aoQ2az1G\npfVKZ8QtLH/jjXmdOfNC4j7XSjaiv79fr7/+nJIDzcXrHWrkWtW6bVLf1duf6y0IvFbGE5C6tN6a\nWemPuPXTMuH9+3z+Ipfyns9fmngfP1yvt/fCpfWScgxptWdg4LKm5QlqPUal9ZKWV1uW9WxEcQyE\n1/bt3t3d77ncYMn1jp9DI9dquW2XywHVc7zVGFOrYa2MJyAkMipoVPH+/dFgZtrq9/FnZma8p2dT\nzTmGRtrV7Nktaz1GpfWSlkeXrZVsRHk7j3pPz4DPzMzErnflrMpKr1W1vk07B7TWZ0xdK+MJiEqz\nUGnmtycjw4rfeNqnwtv91b/5dHFxUb29F+nkyZ2SplUpx9Dox3HDjyI3U63HqLRe0vLosunp6TXx\nbbLl33o7rp6ei7W4uChJkestJZ1DI9eq0rZJ38Tb2XmOpLxWOt5WY0w1E99OjHZHRqVNFe/fv6ZC\ngLKe3MaIKk3MhrWTjajWzladQ9Jxq+WA2sFaGU9As/COSptYWFjQ3NycRkZGSiZQ+/Vf/0W5D+j1\n169SPn+xpO8nTlRWXP9qdXdv04kTp2X2f6i39yKdPn1sRZObLSws6NFHH5Ukbd26VYuLi0vtq9b2\n+PL+/v6K266kLZdddtnSfiodu5qwr66/flxm58v9Je3f//829f9+G2vnT6uz8xy98ca89u/ft7R9\n9HqfOvW8brvtd1elzbfddrM++9nCcQtja58kLbWl3vFWa99UGgOtFn/treT1tpLxkaZWHx9rXFr3\nkFb6IzIqTVctiBfev5+Zmak7t9HIvf+DB+/x7u6NLm1w6bxlQ5uVJl5bLghcf1u2ey432HAgtvAN\nzb0uvdml3rq/obne9jc6+Vpf3zsqfhN2OBFfM4LT9Uyct5LxVmvfVBoDWbLS11urg7itPj5aQ4Rp\nUassBvHm5+e9t3dTEMitHOZdfuK12oLAtbclnQBn8rc2N/4NzZXa38zJ15oxfpoRmF3p+YXrJY2B\nVr9O0tDq13+rj4/WSbNQIaOyzmVx0qu5uTl1dg6rEMitHOZdfuK1ytuurC3Rb1Q+e8UTuSV/a3Pj\n39CcpNmTrzVj/CTtsxkT59UzwV/SGOjouGDNTw7X6td/q4+P9YFCZZ3LYhBvZGREb7xxXIVAbuUw\n7/ITr9UWBK69LdFvVP7higOcyd/a3Pg3NCdp9uRrzRg/qxWYrWeCv6QxcObMi2s+sNrq13+rj491\nIq23Zlb6I279NF2lSa8anU+kHklfatfd3R9kAoaXJhuLtm9yctJ/4Rd+0Xt6NlWceK23dyRx23ra\n9elPf8a7uvqCtlzs3d0DFSdyq9WePTcGt3/e4lK+YkYlKSO0XH+HX7I4OTnpU1NTvm/fXU2bfK3W\ndeLns9xYOXjwHu/t3eR9fTu8t3dTWX+n9eWO1doe/7LK4ni8eFUyKsvNx5OWZly/lRy/v//t3tMz\n4Lff/qmyL0CtNyuH7BMZFdRrpd9+m0YQrtI+5ufn/YEHHvAHHnig7BfUrl3XRP7Q9/q73vVPKk68\nttJfbqWB3J7geBd4d/fGuou5JNE/hMsfv7YZfwsh3bxLb3Ip77ncqOfzW3zfvrtSn3yt3nXqGSuV\nQrzNCO8mtb3YjzuWCsnoeGz2H8o0Z9+tRdrXr1779t3lPT2bvLNzuKzf0wzGIzsoVNCQRr9FuN6w\nar37mJycTAyjTk5OrvicK7frqEurH6RMnhm4+oy/xZBudJvVaW/t57N8m6qtuxrhy9UMOydZrTBx\no21Kc7bpwr7vTej3Xu/trR6qx9qUZqFCRqUNNfotwvWGVevdx5EjRyRdoNIw6vnB8nQU29UnafWD\nlKXHH1Et31xcDOk2HiJOWz3Xudq6qxG+XM2wc5LVChM32qa0jl/c96sq7/chmZ2vLI5pZAeFShuq\nJ2TYaBBuJfvYvXu3pBdVGkZ9KViejmK7XpO0+kHK0uPPqZYZf4sh3cZDxGmr5zpXW3c1wperGXZO\nslph4kbblNbxi/seVHm/L8j9JWVxTCND0nprZqU/4tZP6pKyH9Ecx8zMjH/iE7/pvb2blwJ20YxD\nNPvx6U9/xnt6Bryn53zv7u7zffvuWjpGLYHPMKhYb5h39+4wo7LdpZxfdNHFNd36qSdTUhrIza04\nSJk0Cd7MzEzVvEOYxejt3ey53Js9/s3Flfrq+utvCPrlvJKMSlJ7mxWWrrR+tdBmPE/0hS/8kff0\nbPKNG9+5FJwN++yWW26tKfw5MzPjd9xxhx8+fLhiH1c6r+uu+0hi2LmRTFI94dhavoU7njtaLvPU\nqOWOHz2fycnJmtoSrh8Gvru6hsr6Pa1gPLJFZFRQSdKMr93d4R/CS72jIx8Js/X6hz70S0u/RAYH\nL18Kc4bBto6OoSBoWvgj3tW10ffsualq6K5SULHeMO/k5KRfeOGOkl9su3dfU/Xc6w0D1lpY1HLM\n0r4L+6x8ltPS8GCv9/Rc7D09A0ufclmur77whT8q+dRPUnubFZZebv2kP8zxsGQ4Hnt7R72zc4N3\ndw+U9Vl398aqn/opjLHScVnLjMbFbfMubXOp26+77pdX1BfL9ctK+ir+hz3cthguL76mmqHS8aPn\n091dHoitpU/CwiepyOFTP+sPhQoSlc6weTQIp4X/fcylpBBhGGaLhjnDbe71wjTw0YDn0bJ9JAc+\nKwcVaw3u1ROqbcUMmKXHjPZd5XBucoi2cntXcl7NCks31pak8Vhbn8UVxlh8XD7mvb2blw3kVhqf\nk5OTKx4/aYdjy/eXFEJtXvi3+vnU1pZWvB6RLWkWKmRU1pHkGV+jIbWkEGEYZouGOcNtXpV0jkoD\nnn2KB12TA5+Vg4q1BvfqCdW2YgbM0mPOqdh3lcO5ySHayu1dyXk1KyzdWFuSxuOcaumzuMJYio/L\nnTI7f9lAbqXxeeTIkVRn+G0kHFu+v1dV/jpoXvi3+vkkBWLL28KMtEgThco6kjzjazSklhQiDMNs\n0TBnuM2gpHmVBjxfUzzomhz4rBxUrDW4V0+othUzYJYec0TFvqsczk0O0VZu70rOq1lh6cbakjQe\nR1RLn8UVxlJ8XD4u95eWDeRWGp+7d+9OdYbfRsKx5fsbVPnroHnh3+rnkxSILW9LK16PWMfSemtm\npT/i1k+qijNs5oO32PPe3b3Vw5BaR0evRzMf1133yyXB2q6uPu/q6vfu7kK+paPjLC8ETfMune8d\nHRv8Qx/6pZLQXZiZCN/+rWVW1nBm0g0bLvJcrr9iSLcYqn2zS93+7ne/t+K5h5NKbdz4Tu/pGfCP\nf/w3S3IcYRjxvvvuW2pvdJbXahmVSkHGaACxu7s/6L+zvVo4Nzz3XO48L2RULvKengG/8cab/I47\n7vA/+7M/89///d9fusV18OA93tMz4L29b/Zcrj/xW4W/8pWv+M033+z33XdfxdlqkyY027fvLs/l\nBnzDhh0lOZm4eHahUuAy6Ril59vjHR2bvZBRucQ7O/Pe3T0QCTT3ujTkXV19S21OCip/6EO/VNLH\nnZ19S4Hcau0sbtvr0laPfrN1OH42bLi0JOBbLSBbqV8OHrzH9+27y7u7BzyXG/Wenk11TSL4hS/8\nkedyg97f/w7v7d3k73rXu4PXwYUu9fh11/1yTe2KXpd4PiQ+w3F0u6Swc3Igttff//4PlE3aGIbF\naw3nRpfW4KtnAAAU/0lEQVTFX2vLfTigmnrC0Y0EqVFOZFRQycGD90Smg9/uXV39Zb9ww18E1133\nKx4N1r7rXe8u+ar7cNvbb/+Ud3cPlIRyw19e11//Gx4P1kX/sPb0DFSclj16rFxuMDGke/DgPW5W\nWlxVKnzy+S2+ceOlbhbOMlucvbUYIt4S2Vd3ZL3K4dekcHBU8RM5v1HSn+Ev8Eqf+MjlCp9y6Ozc\nGmlLf8m57t59TcXZaMP+Kc6qe16wv7eW/WGo1N/x4HXSrKCVQpHxwGXp/or9GG7f2XlO5Nxy3tnZ\n7xs3FqfLLx2Ped+165qKIe98/lLv6RnwX/3Vj/mNN/5OSVg3PId4O+Nh3u7uUe/t3bzUxkLb8y6d\nVbJetE+SwtPxfgn7u3BdigXR7t3X1BTYLV7vi1zq9s7Ofh8cvNw7Ozd4Z+dG7+kZqatd4euqNAQb\nHfuFMRXv42jYOSkQ+4lP/JZ3dYVfO1AcP9Hjx4uc5cLG8dfarl3XVP1wQFpB8WbOytuuKFSQqDRM\nWz3EVh4qPOrJAcUwRHfU4+HPnp6B2D7i4dzk4ye382jZvnp7N1U4RrVwbtLsreGyaBAwul7lPqt1\nFtNa10sOl4bneHfCPnq8/HyK/ZPL9Vd8PhrgTe7v8HpX3zYpFDkzMxNbvtz4ifZ9+Qy85df5aML6\nyX1QLZxcbGe0v8v3UT2EHj9W9VmE5+fnI9el9LXR07MpcZvkcRQ9Ri19kNSusB8rjf2kYHNt51je\nZ8nbJAWLk5Ylj4H4GK08VpNfZ8uHeQn+NkeahQoZlXWknq+rLw8V9kkaKttWOjsI0ZWHP802qXo4\nt7AsHqJLbmdf2b46O8+R+2DCMaqFc5Nmbw2XRYOA0fUq91mts5jWul5yuDTsx+dUHpocDJaV939n\n5zk6c2ZDxeejs70m93cYSK2+bVIocmpqKrY8ur+k8RPt+7my9crHUjS0Ha6f3AeVxmdpO5PCvMV9\nFMd+9fWKAdnyc4j2WVLoXDpb0psStwmVjqPoMWrpg6R2ha+rSmN/pMr+q59jeXA/eZukYHHSsuQx\nsPx1aTQoTvA3+7pa3QCkp7+/X6dP/0CFjPTjKrzwCqHE/v5+TU9Pa2RkRENDQ7FQ4U4VAo0LkhYl\nTajwC+E1ST/UmTM/UmkIsrBf959IOl2yrBDOXSxZFg/RFUO/FjnWTKw9hQCf2RlJP4wsn5D0t9q+\nfXvJ/orBvaTZW8Nl0SBgdL2wLeV9ls/nY+36mqTn9Oijj2psbEyjo6NaWFhIWG9C0vOamJjQ9u3b\n9Z73vCfW1mi4NOzHs1UMTYb9+aqk/5XY///wDy9LeqPi89F+L/Z3+PyMCoHUpG0n9PrrT+v555/X\nhg0bdOLEs8Fz3ZL+Qq+//rTy+bxef/25yDYvBfv7u5L9nDnzcrBd2PcTkk5JelbVx9JLQV+E6z8X\ntPnZkv2fPv19dXR0x87hPEnf1OuvP62XXnopaOffSHo62O9zKu2n7+vMGakw/sPny/d3+vT3ZdYV\nLOsvO4ewv7/97W/r9Om/i/XF1yS9rDNnSvvn5Mlndfz4cX32s5+VJG3evDnST/OSnlExePxspH1f\nC8bG3+n1118I+iBc7xmV9uMLKgjH53aVvkYOSHoitv/o8Q8Ey87WyZPPqr+/X5KC3zWdkr4p6Xux\nazuhwuv6pcg1KvbFyZMvqrMzV7Ls9OmFoJ3FbQvX5FUlh7EL12VxcVY33XSTrr32Wo2MjOj48ePq\n6urSa689XbLeyZPP6sc//rFmZ2f1xBNPLK03OTkZjPHiccPrcuDAAW3fvl1TU1N6+OGHNTw8rGee\neUbbt2/Xyy+/rCuuuEK/8iu/oqGhIaGJ0nprZqU/4tZPKkonYyqGDKtN0BYPvb797e/04r3r2maO\nTArO1vK18oXtujyarZDCjMB2L81nlK6XNBtr9JjFwHBx/eKyzZFjRPfbXXLsaD6idNvS/omuVwwd\nnxfZd/lEdcU+CzMl57vUGSzrK+uDYqD43GD9HUs5gML1tpLnzbYn9k8hZL0h0gfR844u66my3nnB\nMc4qW1a4fqXLCtmHwmy7ZoOR58rDxvF+KRwj7MOu2HNv8mImorD/wviMt/1Nkb4Nl0X7OHr+SecY\n7q/Qt4V8ROmy6OuhODFbv0sdseNujpx3YdvChIrxa9AfOcZwwnr9kfVL1yucW3i+5yUcP1w/7IPw\nJ5rZiq5X+lrq6IiPvXjfRsd+MTMVXqOurmhOJuddXRtjyzoj7cl7Z2dvpF29Ce2LXrNq/RO9fuF6\n+ci28T4Ls2zR8dMT2aY42aDZBjItCURGBVHJk4jNu/Tn3tMzUPX+azT5X7jnnHwPvdrMkUmfiKmW\noJ+fn4/cjw7bfG/k31Me3p8u5l0mvfCHsHr2JR4YruVTP3fccUeQHQiPXT6pVXd3X+QXVbUcxd3B\nL7DyvMrk5GTsWhWOlcv1x7ILd3tnZ6/fd999kfXvDfZ9t+dy/ZFvnI3mW37fpTtc6qk4Kd5XvvKV\nWJs3BfsO/zsQnEe1TEN0IsDotvFlpfmDYh+XjtHe3k1L13FyctJzucHYevH8T/n16e3d5IcPHw76\nJTqW4hme6HOHI4/jGZ7wfJLaHD23o97TM+AzMzORCQrvjYyVuyPLov05GFvv3iptuTvWt+H68UzQ\nZ7xYLIfXIN72aN9+xsvHb/T4Ydvj+wjX64s9Hx2P0W0K1yg5t5Pz8nEWPn9H5BzviJzXQNDG2yLH\niue8wvOI9kF0vTu8tjESnuNnItuVZ7FyuUEyLTFpFirc+lkHwnusJ07E799ep66uT0nKK+n+69DQ\nkEZHRzU6Oqrp6engnnNfybphVuPKK6+s+PZmuI+ooaGhiuvPzc0F96N7VTq53FZJ49G9yGwwaM+0\npLdUPI/4McNzi7cpvkySFhcX1dt7kU6ejB47fk/9LJ0+fVKFWxhJOYppFe6lv03SRkkDJduHE9Xl\ncrngWoXHulLd3W+SlNfJk+H6H1Vf35c0PDwcubbXLrWsu/uzKlzTMN/SK+mjkbZ/Wc8888zS7abo\n+Xd3d0fObVrSRZLOj/z3vGC/0UzDAZVmGsKJAM+ObJu0rHj+XV3nBudYPkZzuT9cuo65XE75/MU6\ndSq6Xnj8cJvzFc9/5HIX6sSJE+rpuVCvvx62vS/4GVDyOIueV3zsnx85n2hbwj4L1xtXT8/FWlxc\njExQ+GpwXQaDdeIZkWlJF8fWi7Y53hapkNUYiqwX7e9wveeC7aLXIN72iyPLTqp8/EaPH2am4vsI\n1+uRdFbk+eh4LM+SdHRsCZ6Lvi7C80+6zo9FzvuHQVuiY/S/RtoXHjfa5vA8ktr3WGS9SmMkeo7f\nDtaPtql4Hu7nlfwuQroI064D1SYRq3XiqdLJ4pr7LcIjIyNBJiF6n7zaN6su/83CjbandHKq18ra\nUmjvT2LLo5Pfhe17TUmT4oUT1dU7OVj19cN8S+3fBFyaTYq2eU6lE69VyvPMqXQiwGrLKrW59Lno\ndUwey/FJ2pInHRwbG4t9S2/0WiSNs3hO6fnYMZLOZ0SVxmFxgsJBFa/LoMrzUeE+ktZLasugClmN\n52P7i096946E8600wd6cCoVM0vgNj39RbF/x9U4mHCv+uo6/huJj9dVIG+LX+R2R8462JWzfT0WW\nhftOWi+pfe+IrFdpjETP8V3Bf6NtKp6H2Q+YzK6Z0nprZqU/4tZPKqrlSGrJjIT7KOYY6v8W4Xrb\nW8x+hNmK6Bwn5XmXSt8snFZ7on20Z8+NZX1WmqspZkji7Stcg86y9Soda7lrVG39wrFK27TcF9aV\n5ooKOYFw3BQmBwxzFIOR9eIZkS2R9cIcxVlly5LG4XLfkpu0Xnyiwmi/R/cRbluclCyaPzjXS/MU\n8fMqzc3s2XNj5PVQPJ9q47CYJ9oU2fem2H+3ezGfMZSw3rllbSnsN35dNns8j7Z1a3Tsha+r88ra\nHvZtaS4s5x0dfbFvE99Qsq9CPiq6jw4v7dtoRqWwLJopS8q0lS4rzXZt2TJUoS3hsui5htv2Jaw3\nHBnf4Xo9kfUqjZHzIufYHdmu2O9kVJIpxVs/Vthf65iZt7oN68XCwoLm5ubU39+vxcXFpU/4RJ+L\nLqu0j0cffVSSdNlllzX1rcyFhQUdPXpUx48f1+WXX65cLqdTp07pmWeeWfpETbztkmo6j5W2J7rv\npD6bnZ3VvffeK0n64Ac/uPSpn3j7+vv79a1vfUtPPvmkPvjBD5bdhknad7VrVG39/v7+pU8x/MzP\n/Ezi7a242dlZTU1NaWxsTGeffXbJuOnv79cLLxQ+KdLX17d0PaTCx2e3b9++dK0eeeQRDQ8P69JL\nL9Xi4mLiskptjj+XdL7R9X74wx8utTne79F9hMvDsbR9+3bNzs7qySef1Pj4uIaHh0vGWfS8Xnvt\nNUnFsR99PWzdunWpLeF1Tmr/t771LR05ckQ7duzQsWPHJEk7d+7UK6+8orPOOkuvvPJKSb+fOnVK\nf/3Xf12yXlJbZmdn9eCDD2p4eFjnn3++nnnmGZ111ll6/vnnNTw8rKuvvlpDQ0O6//77tW/fPvX3\n9+sjH/mIhoeHS/oxOkYXFxd1/PhxTU9Pa/fu3dqxY8fSc+EYOHXqlKanp3XllVdqeHi4bB9/9Vd/\npYcffljXXHONduzYUTIew9d1/DUUvY7xZeH1CJ+///77dejQIV1xxRXasWPHUltyuZwk6Tvf+Y7u\nv/9+/fzP//zSp362bNmi733veyXrhdcv2r6uri49/PDDuvrqq3XixInEMcKnflbGzOTulsq+Wl0k\nUKgAALC+pFmoND2jYmb/1MyeNLPvmdknm308AACwfjT1HRUz61BhJqD3Sfq+CpHxf+HuT0bW4R0V\nAADWkbX0jsqYpKfd/Zi7n5Z0j6QPNPmYAABgnWh2oXK+ivM3S4XPd53f5GMCAIB1gnlUAABAZjV7\nZtqXJL058u8LgmUl9u7du/R4fHxc4+PjTW4WAABIy8TEhCYmJpqy72aHaTslPaVCmPYHKny/+L90\n99nIOoRpAQBYR9IM0zb1HRV3f8PM9kg6osJtpv3RIgUAAKAaJnwDAACpWksfTwYAAFgxChUAAJBZ\nFCoAACCzKFQAAEBmUagAAIDMolABAACZRaECAAAyi0IFAABkFoUKAADILAoVAACQWRQqAAAgsyhU\nAABAZlGoAACAzKJQAQAAmUWhAgAAMotCBQAAZBaFCgAAyCwKFQAAkFkUKgAAILMoVAAAQGZRqAAA\ngMyiUAEAAJlFoQIAADKLQgUAAGQWhQoAAMgsChUAAJBZFCoAACCzKFQAAEBmUagAAIDMolABAACZ\nRaECAAAyi0IFAABkFoUKAADILAoVAACQWRQqAAAgsyhUAABAZlGoAACAzKJQAQAAmUWhAgAAMquh\nQsXM/rmZ/U8ze8PMLo89d6uZPW1ms2a2u7FmAgCAdtToOypPSPqgpL+OLjSzUUkfljQq6RpJd5qZ\nNXgs1GBiYqLVTVhX6M/00Jfpoj/TRX9mV0OFirs/5e5PS4oXIR+QdI+7/4O7z0l6WtJYI8dCbXix\npYv+TA99mS76M130Z3Y1K6NyvqQXIv9+KVgGAABQs67lVjCzb0oaji6S5JJ+z93va1bDAAAAzN0b\n34nZUUk3u/sjwb9vkeTu/vng338l6XZ3/3bCto03AAAAZIq7p5JNXfYdlTpEG/QNSX9uZl9U4ZbP\ndklTSRuldSIAAGD9afTjydea2QuSrpJ0v5n9Z0ly9xlJX5U0I+kvJf2Wp/HWDQAAaCup3PoBAABo\nhqZ86sfM9pvZcTN7PLLsHjN7JPh53szCPMs2M/v7yHN3Rra53MweN7Pvmdm/b0Zbs65CX77DzP67\nmT1qZlNm9o8jzyVOtEdfFtTTn4zN5VXoz51m9t/M7DEz+7qZ9UeeY3xWUE9fMjaXZ2YXmNlDZvZd\nM3vCzG4Mlm82syNm9pSZPWBmg5FtGJ8V1NufqY5Rd0/9R9J7Jb1T0uMVnv9DSf82eLytynrflnRl\n8PgvJf1sM9qb5Z+kvpT0gKTdweNrJB0NHr9N0qMqZI9GJD2j4rtmbd+XK+hPxubK+nNK0nuDxx+T\n9O+Cx4zP9PqSsbl8f54r6Z3B435JT0m6RNLnJf1fwfJPSvpc8JjxmW5/pjZGm/KOirtPSvpxlVU+\nLOlQ5N9lgVozO1fSRnefDhb9f5KuTa2Ra0SFvjwjKfy/gE0qzFMjSe9XwkR79GVRnf0pMTarqtCf\nbwmWS9KDkn4xeMz4rKLOvpQYm1W5+8vu/p3g8aKkWUkXqDAh6YFgtQMq9g/js4oV9KeU0hhd9S8l\nNLOfkvSyuz8bWTwSvDV01MzeGyw7X9KLkXVeFJPGhf6NpD80s7+V9AeSbg2WV5poj76srlJ/SozN\nlfiumb0/ePxhFX6ZSYzPlajUlxJjs2ZmNqLCu1V/I2nY3Y9LhT++ks4JVmN81qjG/pRSGqOt+Pbk\nf6nSd1O+L+nN7n65pJslHYze00ai35R0k7u/WYU/sn/S4vasdZX68wdibK7E9ZL+tZlNS+qTdKrF\n7VnLKvUlY7NGQb/8hQqv8UUVJiyN4hMldaijP1Mbo6taqJhZp6RfkHQ4XObup939x8HjRyQ9K2mH\nCtXs1sjmF6j0Lfl29lF3/5okuftfSLoyWF6pz+jL6uL9ORY8PsXYrJ+7f8/df9bdr5R0jwr9JjE+\n61apLxmbtTGzLhX+qP5Hd/96sPi4mQ0Hz58raT5YzvhcRj39meYYbWahYiq/P7VL0qy7f39pJbOz\nzawjeHyRCpPDPRe8hfSqmY2ZmUn6VUlfV3uK9+VLZvbTkmRm71PhXqpUmGjvX5hZzswuVDDRHn1Z\nZrn+/F7wmLFZm5L+NLOh4L8dkv6tpH3BU4zP5dXUl4zNmv2JpBl3/1Jk2TdUCCZL0kdV7B/G5/Jq\n7s9Ux2iT0sEHVbilc1LS30r6tWD5n0r6jdi6vyDpf0p6RNL/kPRzkeeukPSECn+Iv9SMtmb9J6kv\nJb076KtHJf13SZdF1r9VhbT6rIJPstCXK+tPxuaK+/NGFT4R8KSkz8bWZ3ym0JeMzZr68z2S3pD0\nneC1/YikfyppiwrB5KckHZG0KbIN4zOl/kxzjDLhGwAAyKxWhGkBAABqQqECAAAyi0IFAABkFoUK\nAADILAoVAACQWRQqAAAgsyhUAABAZlGoAACAzPrfYeewubNZxX4AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x13e0c5470>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(thechosen.inferreddate, thechosen.copiesin25yrs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Interesting relation to time."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "random.shuffle(selected)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "p = selected[0 : 200]\n",
    "j = selected[200 : 400]\n",
    "t = selected[400 : ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "p.extend(random.sample(j, 25))\n",
    "p.extend(random.sample(t, 25))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "j.extend(random.sample(p, 25))\n",
    "j.extend(random.sample(t, 25))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "t.extend(random.sample(p, 25))\n",
    "t.extend(random.sample(j, 25))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['docid', 'author', 'realname', 'pseudonym', 'gender', 'nationality',\n",
       "       'authordate', 'inferreddate', 'firstpub', 'latestcomp',\n",
       "       'allcopiesofwork', 'copiesin25yrs', 'enumcron', 'imprint', 'genres',\n",
       "       'subjects', 'category', 'shorttitle'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "patrick = pd.read_csv('patrick.tsv')\n",
    "cols = patrick.columns\n",
    "cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(250, 18)"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "patrick = meta.loc[meta.docid.isin(p), cols]\n",
    "patrick.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "patrick.to_csv('copies/patrickmanycopies.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "jessica = meta.loc[meta.docid.isin(j), cols]\n",
    "jessica.to_csv('copies/jessicamanycopies.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "ted = meta.loc[meta.docid.isin(t), cols]\n",
    "ted.to_csv('copies/tedmanycopies.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
